llvm/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
#else
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

// CHECK-LABEL: @test_svst2q_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst2q_u8u10__SVBool_tPKh11svuint8x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_u8(svbool_t pg, const uint8_t *base, svuint8x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_u8,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst2q_s8u10__SVBool_tPKa10svint8x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_s8(svbool_t pg, const int8_t *base, svint8x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_s8,)(pg, base, zt);
}
// CHECK-LABEL: @test_svst2q_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_u16u10__SVBool_tPKt12svuint16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_u16(svbool_t pg, const uint16_t *base, svuint16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_u16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_s16u10__SVBool_tPKs11svint16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_s16(svbool_t pg, const int16_t *base, svint16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_s16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_u32u10__SVBool_tPKj12svuint32x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_u32(svbool_t pg, const uint32_t *base, svuint32x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_u32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_s32u10__SVBool_tPKi11svint32x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_s32(svbool_t pg, const int32_t *base, svint32x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_s32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_u64u10__SVBool_tPKm12svuint64x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_u64(svbool_t pg, const uint64_t *base, svuint64x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_u64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_s64u10__SVBool_tPKl11svint64x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_s64(svbool_t pg, const int64_t *base, svint64x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_s64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_f16u10__SVBool_tPKDh13svfloat16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_f16(svbool_t pg, const float16_t *base, svfloat16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_f16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z16test_svst2q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_bf16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_f32u10__SVBool_tPKf13svfloat32x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_f32(svbool_t pg, const float32_t *base, svfloat32x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_f32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst2q_f64u10__SVBool_tPKd13svfloat64x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt)
{
  SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_u8u10__SVBool_tPKhl11svuint8x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum_,,u8,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_s8u10__SVBool_tPKal10svint8x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_s8,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u16u10__SVBool_tPKtl12svuint16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_u16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s16u10__SVBool_tPKsl11svint16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_s16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u32u10__SVBool_tPKjl12svuint32x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_u32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s32u10__SVBool_tPKil11svint32x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_s32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u64u10__SVBool_tPKml12svuint64x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_u64,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s64u10__SVBool_tPKll11svint64x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_s64,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f16u10__SVBool_tPKDhl13svfloat16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_f16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z21test_svst2q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_bf16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f32u10__SVBool_tPKfl13svfloat32x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_f32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst2q_vnum_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f64u10__SVBool_tPKdl13svfloat64x2_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x2_t zt)
{
  SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt);
}

//
// ST3Q
// CHECK-LABEL: @test_svst3q_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst3q_u8u10__SVBool_tPKh11svuint8x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_u8(svbool_t pg, const uint8_t *base, svuint8x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_u8,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst3q_s8u10__SVBool_tPKa10svint8x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_s8(svbool_t pg, const int8_t *base, svint8x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_s8,)(pg, base, zt);
}
// CHECK-LABEL: @test_svst3q_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_u16u10__SVBool_tPKt12svuint16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_u16(svbool_t pg, const uint16_t *base, svuint16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_u16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_s16u10__SVBool_tPKs11svint16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_s16(svbool_t pg, const int16_t *base, svint16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_s16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_u32u10__SVBool_tPKj12svuint32x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_u32(svbool_t pg, const uint32_t *base, svuint32x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_u32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_s32u10__SVBool_tPKi11svint32x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_s32(svbool_t pg, const int32_t *base, svint32x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_s32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_u64u10__SVBool_tPKm12svuint64x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_u64(svbool_t pg, const uint64_t *base, svuint64x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_u64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_s64u10__SVBool_tPKl11svint64x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_s64(svbool_t pg, const int64_t *base, svint64x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_s64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_f16u10__SVBool_tPKDh13svfloat16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_f16(svbool_t pg, const float16_t *base, svfloat16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_f16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z16test_svst3q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_bf16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_f32u10__SVBool_tPKf13svfloat32x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_f32(svbool_t pg, const float32_t *base, svfloat32x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_f32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst3q_f64u10__SVBool_tPKd13svfloat64x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt)
{
  SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_u8u10__SVBool_tPKhl11svuint8x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum_,,u8,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_s8u10__SVBool_tPKal10svint8x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_s8,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u16u10__SVBool_tPKtl12svuint16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_u16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s16u10__SVBool_tPKsl11svint16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_s16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u32u10__SVBool_tPKjl12svuint32x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_u32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s32u10__SVBool_tPKil11svint32x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_s32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u64u10__SVBool_tPKml12svuint64x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_u64,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s64u10__SVBool_tPKll11svint64x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_s64,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f16u10__SVBool_tPKDhl13svfloat16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_f16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z21test_svst3q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_bf16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f32u10__SVBool_tPKfl13svfloat32x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_f32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst3q_vnum_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f64u10__SVBool_tPKdl13svfloat64x3_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x3_t zt)
{
  SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt);
}

//
// ST4Q
// CHECK-LABEL: @test_svst4q_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst4q_u8u10__SVBool_tPKh11svuint8x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_u8(svbool_t pg, const uint8_t *base, svuint8x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_u8,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst4q_s8u10__SVBool_tPKa10svint8x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_s8(svbool_t pg, const int8_t *base, svint8x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_s8,)(pg, base, zt);
}
// CHECK-LABEL: @test_svst4q_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_u16u10__SVBool_tPKt12svuint16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_u16(svbool_t pg, const uint16_t *base, svuint16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_u16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_s16u10__SVBool_tPKs11svint16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_s16(svbool_t pg, const int16_t *base, svint16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_s16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_u32u10__SVBool_tPKj12svuint32x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_u32(svbool_t pg, const uint32_t *base, svuint32x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_u32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_s32u10__SVBool_tPKi11svint32x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_s32(svbool_t pg, const int32_t *base, svint32x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_s32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_u64u10__SVBool_tPKm12svuint64x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_u64(svbool_t pg, const uint64_t *base, svuint64x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_u64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_s64u10__SVBool_tPKl11svint64x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_s64(svbool_t pg, const int64_t *base, svint64x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_s64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_f16u10__SVBool_tPKDh13svfloat16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_f16(svbool_t pg, const float16_t *base, svfloat16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_f16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z16test_svst4q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_bf16,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_f32u10__SVBool_tPKf13svfloat32x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_f32(svbool_t pg, const float32_t *base, svfloat32x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_f32,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst4q_f64u10__SVBool_tPKd13svfloat64x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt)
{
  SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_u8u10__SVBool_tPKhl11svuint8x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum_,,u8,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_s8u10__SVBool_tPKal10svint8x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]]
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> [[ZT_COERCE0:%.*]], <vscale x 16 x i8> [[ZT_COERCE1:%.*]], <vscale x 16 x i8> [[ZT_COERCE2:%.*]], <vscale x 16 x i8> [[ZT_COERCE3:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_s8,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u16u10__SVBool_tPKtl12svuint16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_u16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s16u10__SVBool_tPKsl11svint16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> [[ZT_COERCE0:%.*]], <vscale x 8 x i16> [[ZT_COERCE1:%.*]], <vscale x 8 x i16> [[ZT_COERCE2:%.*]], <vscale x 8 x i16> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_s16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u32u10__SVBool_tPKjl12svuint32x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_u32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s32u10__SVBool_tPKil11svint32x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> [[ZT_COERCE0:%.*]], <vscale x 4 x i32> [[ZT_COERCE1:%.*]], <vscale x 4 x i32> [[ZT_COERCE2:%.*]], <vscale x 4 x i32> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_s32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u64u10__SVBool_tPKml12svuint64x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_u64,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s64u10__SVBool_tPKll11svint64x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> [[ZT_COERCE0:%.*]], <vscale x 2 x i64> [[ZT_COERCE1:%.*]], <vscale x 2 x i64> [[ZT_COERCE2:%.*]], <vscale x 2 x i64> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_s64,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f16u10__SVBool_tPKDhl13svfloat16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> [[ZT_COERCE0:%.*]], <vscale x 8 x half> [[ZT_COERCE1:%.*]], <vscale x 8 x half> [[ZT_COERCE2:%.*]], <vscale x 8 x half> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_f16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z21test_svst4q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> [[ZT_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZT_COERCE3:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_bf16,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f32u10__SVBool_tPKfl13svfloat32x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> [[ZT_COERCE0:%.*]], <vscale x 4 x float> [[ZT_COERCE1:%.*]], <vscale x 4 x float> [[ZT_COERCE2:%.*]], <vscale x 4 x float> [[ZT_COERCE3:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_f32,)(pg, base, vnum, zt);
}

// CHECK-LABEL: @test_svst4q_vnum_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f64u10__SVBool_tPKdl13svfloat64x4_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT:    [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> [[ZT_COERCE0:%.*]], <vscale x 2 x double> [[ZT_COERCE1:%.*]], <vscale x 2 x double> [[ZT_COERCE2:%.*]], <vscale x 2 x double> [[ZT_COERCE3:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP3]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x4_t zt)
{
  SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt);
}

// Scatter for 128 bits
// vector base + scalar offset
// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlS0_(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_u64(svbool_t pg, svuint64_t base, int64_t offset, svuint64_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u64)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_s64(svbool_t pg, svuint64_t base, int64_t offset, svint64_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s64)(pg, base, offset, data);
}


// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_u32u10__SVBool_tu12__SVUint64_tlu12__SVUint32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_u32(svbool_t pg, svuint64_t base, int64_t offset, svuint32_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u32)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_s32u10__SVBool_tu12__SVUint64_tlu11__SVInt32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_s32(svbool_t pg, svuint64_t base, int64_t offset, svint32_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s32)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_u16u10__SVBool_tu12__SVUint64_tlu12__SVUint16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_u16(svbool_t pg, svuint64_t base, int64_t offset, svuint16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u16)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_s16u10__SVBool_tu12__SVUint64_tlu11__SVInt16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_s16(svbool_t pg, svuint64_t base, int64_t offset, svint16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s16)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_offset_u8u10__SVBool_tu12__SVUint64_tlu11__SVUint8_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_u8(svbool_t pg, svuint64_t base, int64_t offset, svuint8_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _u8)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_offset_s8u10__SVBool_tu12__SVUint64_tlu10__SVInt8_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_s8(svbool_t pg, svuint64_t base, int64_t offset, svint8_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _s8)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_f64(svbool_t pg, svuint64_t base, int64_t offset, svfloat64_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _f64)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_f32u10__SVBool_tu12__SVUint64_tlu13__SVFloat32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_f32(svbool_t pg, svuint64_t base, int64_t offset, svfloat32_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _f32)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_f16u10__SVBool_tu12__SVUint64_tlu13__SVFloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_f16(svbool_t pg, svuint64_t base, int64_t offset, svfloat16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _f16)(pg, base, offset, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z39test_svst1q_scatter_u64base_offset_bf16u10__SVBool_tu12__SVUint64_tlu14__SVBfloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[OFFSET:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_offset_bf16(svbool_t pg, svuint64_t base, int64_t offset, svbfloat16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _bf16)(pg, base, offset, data);
}

// Vector Base and no Offset
// CHECK-LABEL: @test_svst1q_scatter_u64base_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_u64u10__SVBool_tu12__SVUint64_tS0_(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_u64(svbool_t pg, svuint64_t base, svuint64_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u64)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_s64u10__SVBool_tu12__SVUint64_tu11__SVInt64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_s64(svbool_t pg, svuint64_t base, svint64_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s64)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_u32u10__SVBool_tu12__SVUint64_tu12__SVUint32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_u32(svbool_t pg, svuint64_t base, svuint32_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u32)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_s32u10__SVBool_tu12__SVUint64_tu11__SVInt32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_s32(svbool_t pg, svuint64_t base, svint32_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s32)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_u16u10__SVBool_tu12__SVUint64_tu12__SVUint16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_u16(svbool_t pg, svuint64_t base, svuint16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u16)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_s16u10__SVBool_tu12__SVUint64_tu11__SVInt16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_s16(svbool_t pg, svuint64_t base, svint16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s16)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z30test_svst1q_scatter_u64base_u8u10__SVBool_tu12__SVUint64_tu11__SVUint8_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_u8(svbool_t pg, svuint64_t base, svuint8_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _u8)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z30test_svst1q_scatter_u64base_s8u10__SVBool_tu12__SVUint64_tu10__SVInt8_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_s8(svbool_t pg, svuint64_t base, svint8_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _s8)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_f64u10__SVBool_tu12__SVUint64_tu13__SVFloat64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_f64(svbool_t pg, svuint64_t base, svfloat64_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _f64)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_f32u10__SVBool_tu12__SVUint64_tu13__SVFloat32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_f32(svbool_t pg, svuint64_t base, svfloat32_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _f32)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_f16u10__SVBool_tu12__SVUint64_tu13__SVFloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_f16(svbool_t pg, svuint64_t base, svfloat16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,, _f16)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64base_bf16u10__SVBool_tu12__SVUint64_tu14__SVBfloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 0)
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_bf16(svbool_t pg, svuint64_t base, svbfloat16_t data)
{
  SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_bf16)(pg, base, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_s16u10__SVBool_tPsu12__SVUint64_tu11__SVInt16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_s16(svbool_t pg, int16_t *base, svuint64_t idx, svint16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _s16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_u16u10__SVBool_tPtu12__SVUint64_tu12__SVUint16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_u16(svbool_t pg, uint16_t *base, svuint64_t idx, svuint16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _u16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_s32u10__SVBool_tPiu12__SVUint64_tu11__SVInt32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_s32(svbool_t pg, int32_t *base, svuint64_t idx, svint32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _s32)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_u32u10__SVBool_tPju12__SVUint64_tu12__SVUint32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_u32(svbool_t pg, uint32_t *base, svuint64_t idx, svuint32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _u32)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t idx, svint64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _s64)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_u64u10__SVBool_tPmu12__SVUint64_tS1_(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t idx, svuint64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _u64)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64index_bf16u10__SVBool_tPu6__bf16u12__SVUint64_tu14__SVBfloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_bf16(svbool_t pg, bfloat16_t *base, svuint64_t idx, svbfloat16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _bf16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_f16u10__SVBool_tPDhu12__SVUint64_tu13__SVFloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_f16(svbool_t pg, float16_t *base, svuint64_t idx, svfloat16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _f16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_f32u10__SVBool_tPfu12__SVUint64_tu13__SVFloat32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_f32(svbool_t pg, float32_t *base, svuint64_t idx, svfloat32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _f32)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64index_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t idx, svfloat64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _f64)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_s16u10__SVBool_tu12__SVUint64_tlu11__SVInt16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_s16(svbool_t pg, svuint64_t base, int64_t idx, svint16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_s16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_u16u10__SVBool_tu12__SVUint64_tlu12__SVUint16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_u16(svbool_t pg, svuint64_t base, int64_t idx, svuint16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_u16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_s32u10__SVBool_tu12__SVUint64_tlu11__SVInt32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_s32(svbool_t pg, svuint64_t base, int64_t idx, svint32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_s32)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_u32u10__SVBool_tu12__SVUint64_tlu12__SVUint32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_u32(svbool_t pg, svuint64_t base, int64_t idx, svuint32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_u32)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_s64(svbool_t pg, svuint64_t base, int64_t idx, svint64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_s64)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_u64u10__SVBool_tu12__SVUint64_tlS0_(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_u64(svbool_t pg, svuint64_t base, int64_t idx, svuint64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_u64)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_index_bf16u10__SVBool_tu12__SVUint64_tlu14__SVBfloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_bf16(svbool_t pg, svuint64_t base, int64_t idx, svbfloat16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_bf16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_f16u10__SVBool_tu12__SVUint64_tlu13__SVFloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_f16(svbool_t pg, svuint64_t base, int64_t idx, svfloat16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f16)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_f32u10__SVBool_tu12__SVUint64_tlu13__SVFloat32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_f32(svbool_t pg, svuint64_t base, int64_t idx, svfloat32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f32)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64base_index_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64base_index_f64(svbool_t pg, svuint64_t base, int64_t idx, svfloat64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f64)(pg, base, idx, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_s8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64offset_s8u10__SVBool_tPau12__SVUint64_tu10__SVInt8_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_s8(svbool_t pg, int8_t *base, svuint64_t off, svint8_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s8)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_u8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64offset_u8u10__SVBool_tPhu12__SVUint64_tu11__SVUint8_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_u8(svbool_t pg, uint8_t *base, svuint64_t off, svuint8_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u8)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_s16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_s16u10__SVBool_tPsu12__SVUint64_tu11__SVInt16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_s16(svbool_t pg, int16_t *base, svuint64_t off, svint16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s16)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_u16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_u16u10__SVBool_tPtu12__SVUint64_tu12__SVUint16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_u16(svbool_t pg, uint16_t *base, svuint64_t off, svuint16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u16)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_s32u10__SVBool_tPiu12__SVUint64_tu11__SVInt32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_s32(svbool_t pg, int32_t *base, svuint64_t off, svint32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s32)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_u32u10__SVBool_tPju12__SVUint64_tu12__SVUint32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_u32(svbool_t pg, uint32_t *base, svuint64_t off, svuint32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u32)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_s64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t off, svint64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s64)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_u64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_u64u10__SVBool_tPmu12__SVUint64_tS1_(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t off, svuint64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u64)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_bf16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z34test_svst1q_scatter_u64offset_bf16u10__SVBool_tPu6__bf16u12__SVUint64_tu14__SVBfloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_bf16(svbool_t pg, bfloat16_t *base, svuint64_t off, svbfloat16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_bf16)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_f16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_f16u10__SVBool_tPDhu12__SVUint64_tu13__SVFloat16_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_f16(svbool_t pg, float16_t *base, svuint64_t off, svfloat16_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_f16)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_f32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_f32u10__SVBool_tPfu12__SVUint64_tu13__SVFloat32_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_f32(svbool_t pg, float32_t *base, svuint64_t off, svfloat32_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_f32)(pg, base, off, data);
}

// CHECK-LABEL: @test_svst1q_scatter_u64offset_f64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CHECK-NEXT:    ret void
//
// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT:    ret void
//
void test_svst1q_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t off, svfloat64_t data) {
  SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_f64)(pg, base, off, data);
}