llvm/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=instcombine < %s | FileCheck %s

target triple = "aarch64"

define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
; CHECK-LABEL: define <vscale x 8 x i1> @reinterpret_test_h
; CHECK-SAME: (<vscale x 8 x i1> [[A:%.*]]) {
; CHECK-NEXT:    ret <vscale x 8 x i1> [[A]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
  %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
  ret <vscale x 8 x i1> %2
}

; Reinterprets are not redundant because the second reinterpret zeros the
; lanes that don't exist within its input.
define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_test_h_rev
; CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) {
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[A]])
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
;
  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
  %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
  ret <vscale x 16 x i1> %2
}

define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
; CHECK-LABEL: define <vscale x 4 x i1> @reinterpret_test_w
; CHECK-SAME: (<vscale x 4 x i1> [[A:%.*]]) {
; CHECK-NEXT:    ret <vscale x 4 x i1> [[A]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
  %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  ret <vscale x 4 x i1> %2
}

; Reinterprets are not redundant because the second reinterpret zeros the
; lanes that don't exist within its input.
define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_test_w_rev
; CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) {
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[A]])
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
;
  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
  %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
  ret <vscale x 16 x i1> %2
}

define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_test_d
; CHECK-SAME: (<vscale x 2 x i1> [[A:%.*]]) {
; CHECK-NEXT:    ret <vscale x 2 x i1> [[A]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  %2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
  ret <vscale x 2 x i1> %2
}

; Reinterprets are not redundant because the second reinterpret zeros the
; lanes that don't exist within its input.
define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_test_d_rev
; CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) {
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[A]])
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
;
  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
  %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
  ret <vscale x 16 x i1> %2
}

define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_test_full_chain
; CHECK-SAME: (<vscale x 2 x i1> [[A:%.*]]) {
; CHECK-NEXT:    ret <vscale x 2 x i1> [[A]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
  %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
  %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
  %6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
  ret <vscale x 2 x i1> %6
}

; The last two reinterprets are not necessary, since they are doing the same
; work as the first two.
define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
; CHECK-LABEL: define <vscale x 4 x i1> @reinterpret_test_partial_chain
; CHECK-SAME: (<vscale x 2 x i1> [[A:%.*]]) {
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT:    ret <vscale x 4 x i1> [[TMP2]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
  %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
  ret <vscale x 4 x i1> %4
}

; The chain cannot be reduced because of the second reinterpret, which causes
; zeroing.
define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
; CHECK-LABEL: define <vscale x 8 x i1> @reinterpret_test_irreducible_chain
; CHECK-SAME: (<vscale x 8 x i1> [[A:%.*]]) {
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[A]])
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
; CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP3]])
; CHECK-NEXT:    ret <vscale x 8 x i1> [[TMP4]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
  %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
  %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
  ret <vscale x 8 x i1> %4
}

; Here, the candidate list is larger than the number of instructions that we
; end up removing.
define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
; CHECK-LABEL: define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates
; CHECK-SAME: (<vscale x 8 x i1> [[A:%.*]]) {
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[A]])
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT:    ret <vscale x 4 x i1> [[TMP2]]
;
  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
  %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
  %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
  %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
  ret <vscale x 4 x i1> %4
}

define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_reductions
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT:    i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT:    i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT:    ]
; CHECK:       br_phi_a:
; CHECK-NEXT:    br label [[JOIN:%.*]]
; CHECK:       br_phi_b:
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       br_phi_c:
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       join:
; CHECK-NEXT:    [[PG1:%.*]] = phi <vscale x 2 x i1> [ [[A]], [[BR_PHI_A]] ], [ [[B]], [[BR_PHI_B]] ], [ [[C]], [[BR_PHI_C]] ]
; CHECK-NEXT:    ret <vscale x 2 x i1> [[PG1]]
;

entry:
  switch i32 %cond, label %br_phi_c [
  i32 43, label %br_phi_a
  i32 45, label %br_phi_b
  ]

br_phi_a:
  %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  br label %join

br_phi_b:
  %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
  br label %join

br_phi_c:
  %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
  br label %join

join:
  %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
  %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
  ret <vscale x 2 x i1> %pg1
}

; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_reductions_1
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 4 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT:    i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT:    i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT:    ]
; CHECK:       br_phi_a:
; CHECK-NEXT:    [[A1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT:    br label [[JOIN:%.*]]
; CHECK:       br_phi_b:
; CHECK-NEXT:    [[B1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[B]])
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       br_phi_c:
; CHECK-NEXT:    [[C1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[C]])
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       join:
; CHECK-NEXT:    [[PG:%.*]] = phi <vscale x 16 x i1> [ [[A1]], [[BR_PHI_A]] ], [ [[B1]], [[BR_PHI_B]] ], [ [[C1]], [[BR_PHI_C]] ]
; CHECK-NEXT:    [[PG1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
; CHECK-NEXT:    ret <vscale x 2 x i1> [[PG1]]
;

entry:
  switch i32 %cond, label %br_phi_c [
  i32 43, label %br_phi_a
  i32 45, label %br_phi_b
  ]

br_phi_a:
  %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  br label %join

br_phi_b:
  %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
  br label %join

br_phi_c:
  %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
  br label %join

join:
  %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
  %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
  ret <vscale x 2 x i1> %pg1
}

; No transform. Similar to the the test above, but here only two of the arguments need to
; be converted to svbool.
define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 2 x i1> @reinterpret_reductions_2
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT:    i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT:    i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT:    ]
; CHECK:       br_phi_a:
; CHECK-NEXT:    [[A1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT:    br label [[JOIN:%.*]]
; CHECK:       br_phi_b:
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       br_phi_c:
; CHECK-NEXT:    [[C1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[C]])
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       join:
; CHECK-NEXT:    [[PG:%.*]] = phi <vscale x 16 x i1> [ [[A1]], [[BR_PHI_A]] ], [ [[B]], [[BR_PHI_B]] ], [ [[C1]], [[BR_PHI_C]] ]
; CHECK-NEXT:    [[PG1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
; CHECK-NEXT:    ret <vscale x 2 x i1> [[PG1]]
;

entry:
  switch i32 %cond, label %br_phi_c [
  i32 43, label %br_phi_a
  i32 45, label %br_phi_b
  ]

br_phi_a:
  %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  br label %join

br_phi_b:
  br label %join

br_phi_c:
  %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
  br label %join

join:
  %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
  %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
  ret <vscale x 2 x i1> %pg1
}

; Similar to reinterpret_reductions but the reinterprets remain because the
; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
; CHECK-LABEL: define <vscale x 16 x i1> @reinterpret_reductions3
; CHECK-SAME: (i32 [[COND:%.*]], <vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[B:%.*]], <vscale x 2 x i1> [[C:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 [[COND]], label [[BR_PHI_C:%.*]] [
; CHECK-NEXT:    i32 43, label [[BR_PHI_A:%.*]]
; CHECK-NEXT:    i32 45, label [[BR_PHI_B:%.*]]
; CHECK-NEXT:    ]
; CHECK:       br_phi_a:
; CHECK-NEXT:    [[A1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[A]])
; CHECK-NEXT:    br label [[JOIN:%.*]]
; CHECK:       br_phi_b:
; CHECK-NEXT:    [[B1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[B]])
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       br_phi_c:
; CHECK-NEXT:    [[C1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[C]])
; CHECK-NEXT:    br label [[JOIN]]
; CHECK:       join:
; CHECK-NEXT:    [[PG:%.*]] = phi <vscale x 16 x i1> [ [[A1]], [[BR_PHI_A]] ], [ [[B1]], [[BR_PHI_B]] ], [ [[C1]], [[BR_PHI_C]] ]
; CHECK-NEXT:    ret <vscale x 16 x i1> [[PG]]
;

entry:
  switch i32 %cond, label %br_phi_c [
  i32 43, label %br_phi_a
  i32 45, label %br_phi_b
  ]

br_phi_a:
  %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
  br label %join

br_phi_b:
  %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
  br label %join

br_phi_c:
  %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
  br label %join

join:
  %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
  %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
  ret <vscale x 16 x i1> %pg
}

define void @phi_insert_point(<vscale x 4 x i1> %arg, ptr %p) {
; CHECK-LABEL: define void @phi_insert_point
; CHECK-SAME: (<vscale x 4 x i1> [[ARG:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[CONVERT:%.*]] = phi <vscale x 4 x i1> [ [[ARG]], [[ENTRY:%.*]] ], [ zeroinitializer, [[FOR_BODY]] ]
; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[IDX_EXT:%.*]] = ashr i64 [[IDX]], 1
; CHECK-NEXT:    store <vscale x 4 x i1> [[CONVERT]], ptr [[P]], align 1
; CHECK-NEXT:    [[IDX_NEXT]] = or i64 [[IDX_EXT]], 1
; CHECK-NEXT:    br label [[FOR_BODY]]
;
entry:
  %init = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %arg)
  br label %for.body

for.body:
  %phi = phi <vscale x 16 x i1> [ %init, %entry ], [ %phi.next, %for.body ]
  %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.body ]
  %idx.ext = ashr i64 %idx, 1
  %convert = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %phi)
  store <vscale x 4 x i1> %convert, ptr %p
  %idx.next = or i64 %idx.ext, 1
  %phi.next = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> zeroinitializer)
  br label %for.body
}

declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)

attributes #0 = { "target-features"="+sve" }