; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
target triple = "aarch64"
define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
; CHECK-LABEL: define <vscale x 8 x i1> @reinterpret_test_h
; CHECK-SAME: (<vscale x 8 x i1> [[A:%.*]]) {
; CHECK-NEXT: ret <vscale x 8 x i1> [[A]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
ret <vscale x 8 x i1> %2
}
; Reinterprets are not redundant because the second reinterpret zeros the
; lanes that don't exist within its input.
define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_test_h_rev
; CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
ret <vscale x 16 x i1> %2
}
define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
; CHECK-LABEL: define <vscale x 4 x i1> @reinterpret_test_w
; CHECK-SAME: (<vscale x 4 x i1> [[A:%.*]]) {
; CHECK-NEXT: ret <vscale x 4 x i1> [[A]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
ret <vscale x 4 x i1> %2
}
; Reinterprets are not redundant because the second reinterpret zeros the
; lanes that don't exist within its input.
define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_test_w_rev
; CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
ret <vscale x 16 x i1> %2
}
define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_test_d
; CHECK-SAME: (<vscale x 2 x i1> [[A:%.*]]) {
; CHECK-NEXT: ret <vscale x 2 x i1> [[A]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
%2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
ret <vscale x 2 x i1> %2
}
; Reinterprets are not redundant because the second reinterpret zeros the
; lanes that don't exist within its input.
define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_test_d_rev
; CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
ret <vscale x 16 x i1> %2
}
define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_test_full_chain
; CHECK-SAME: (<vscale x 2 x i1> [[A:%.*]]) {
; CHECK-NEXT: ret <vscale x 2 x i1> [[A]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
%5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
%6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
ret <vscale x 2 x i1> %6
}
; The last two reinterprets are not necessary, since they are doing the same
; work as the first two.
define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
; CHECK-LABEL: define <vscale x 4 x i1> @reinterpret_test_partial_chain
; CHECK-SAME: (<vscale x 2 x i1> [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP2]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
ret <vscale x 4 x i1> %4
}
; The chain cannot be reduced because of the second reinterpret, which causes
; zeroing.
define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
; CHECK-LABEL: define <vscale x 8 x i1> @reinterpret_test_irreducible_chain
; CHECK-SAME: (<vscale x 8 x i1> [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP3]])
; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP4]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
ret <vscale x 8 x i1> %4
}
; Here, the candidate list is larger than the number of instructions that we
; end up removing.
define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
; CHECK-LABEL: define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates
; CHECK-SAME: (<vscale x 8 x i1> [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP2]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
ret <vscale x 4 x i1> %4
}
define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_reductions
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT: i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT: i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT: ]
; CHECK: br_phi_a:
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: br_phi_b:
; CHECK-NEXT: br label [[JOIN]]
; CHECK: br_phi_c:
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[PG1:%.*]] = phi <vscale x 2 x i1> [ [[A]], [[BR_PHI_A]] ], [ [[B]], [[BR_PHI_B]] ], [ [[C]], [[BR_PHI_C]] ]
; CHECK-NEXT: ret <vscale x 2 x i1> [[PG1]]
;
entry:
switch i32 %cond, label %br_phi_c [
i32 43, label %br_phi_a
i32 45, label %br_phi_b
]
br_phi_a:
%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
br label %join
br_phi_b:
%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
br label %join
br_phi_c:
%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
br label %join
join:
%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
ret <vscale x 2 x i1> %pg1
}
; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_reductions_1
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 4 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT: i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT: i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT: ]
; CHECK: br_phi_a:
; CHECK-NEXT: [[A1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: br_phi_b:
; CHECK-NEXT: [[B1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[B]])
; CHECK-NEXT: br label [[JOIN]]
; CHECK: br_phi_c:
; CHECK-NEXT: [[C1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[C]])
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[PG:%.*]] = phi <vscale x 16 x i1> [ [[A1]], [[BR_PHI_A]] ], [ [[B1]], [[BR_PHI_B]] ], [ [[C1]], [[BR_PHI_C]] ]
; CHECK-NEXT: [[PG1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
; CHECK-NEXT: ret <vscale x 2 x i1> [[PG1]]
;
entry:
switch i32 %cond, label %br_phi_c [
i32 43, label %br_phi_a
i32 45, label %br_phi_b
]
br_phi_a:
%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
br label %join
br_phi_b:
%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
br label %join
br_phi_c:
%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
br label %join
join:
%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
ret <vscale x 2 x i1> %pg1
}
; No transform. Similar to the the test above, but here only two of the arguments need to
; be converted to svbool.
define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_reductions_2
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT: i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT: i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT: ]
; CHECK: br_phi_a:
; CHECK-NEXT: [[A1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: br_phi_b:
; CHECK-NEXT: br label [[JOIN]]
; CHECK: br_phi_c:
; CHECK-NEXT: [[C1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[C]])
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[PG:%.*]] = phi <vscale x 16 x i1> [ [[A1]], [[BR_PHI_A]] ], [ [[B]], [[BR_PHI_B]] ], [ [[C1]], [[BR_PHI_C]] ]
; CHECK-NEXT: [[PG1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
; CHECK-NEXT: ret <vscale x 2 x i1> [[PG1]]
;
entry:
switch i32 %cond, label %br_phi_c [
i32 43, label %br_phi_a
i32 45, label %br_phi_b
]
br_phi_a:
%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
br label %join
br_phi_b:
br label %join
br_phi_c:
%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
br label %join
join:
%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
ret <vscale x 2 x i1> %pg1
}
; Similar to reinterpret_reductions but the reinterprets remain because the
; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_reductions3
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT: i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT: i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT: ]
; CHECK: br_phi_a:
; CHECK-NEXT: [[A1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: br_phi_b:
; CHECK-NEXT: [[B1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[B]])
; CHECK-NEXT: br label [[JOIN]]
; CHECK: br_phi_c:
; CHECK-NEXT: [[C1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[C]])
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[PG:%.*]] = phi <vscale x 16 x i1> [ [[A1]], [[BR_PHI_A]] ], [ [[B1]], [[BR_PHI_B]] ], [ [[C1]], [[BR_PHI_C]] ]
; CHECK-NEXT: ret <vscale x 16 x i1> [[PG]]
;
entry:
switch i32 %cond, label %br_phi_c [
i32 43, label %br_phi_a
i32 45, label %br_phi_b
]
br_phi_a:
%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
br label %join
br_phi_b:
%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
br label %join
br_phi_c:
%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
br label %join
join:
%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
ret <vscale x 16 x i1> %pg
}
define void @phi_insert_point(<vscale x 4 x i1> %arg, ptr %p) {
; CHECK-LABEL: define void @phi_insert_point
; CHECK-SAME: (<vscale x 4 x i1> [[ARG:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[CONVERT:%.*]] = phi <vscale x 4 x i1> [ [[ARG]], [[ENTRY:%.*]] ], [ zeroinitializer, [[FOR_BODY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[IDX_EXT:%.*]] = ashr i64 [[IDX]], 1
; CHECK-NEXT: store <vscale x 4 x i1> [[CONVERT]], ptr [[P]], align 1
; CHECK-NEXT: [[IDX_NEXT]] = or i64 [[IDX_EXT]], 1
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
%init = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %arg)
br label %for.body
for.body:
%phi = phi <vscale x 16 x i1> [ %init, %entry ], [ %phi.next, %for.body ]
%idx = phi i64 [ 0, %entry ], [ %idx.next, %for.body ]
%idx.ext = ashr i64 %idx, 1
%convert = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %phi)
store <vscale x 4 x i1> %convert, ptr %p
%idx.next = or i64 %idx.ext, 1
%phi.next = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> zeroinitializer)
br label %for.body
}
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
attributes #0 = { "target-features"="+sve" }