llvm/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -O1 -Werror -o /dev/null %s

#include <arm_sme.h>

// CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za8(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
  svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za16(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
  svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za32(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
  svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za64(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
  svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za128(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
  svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_ver_hor_za8(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
  svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za16(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
  svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za32(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
  svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za64(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
  svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}

// CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za128(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT:  entry:
// CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
// CHECK-C-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-C-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-C-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-C-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
// CHECK-CXX-NEXT:    [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
// CHECK-CXX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT:    ret void
//
void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
  svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
  svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
// CHECK: {{.*}}