llvm/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s

// CHECK-LABEL: define dso_local void @_Z11test_localsv
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[S8:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT:    [[S16:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT:    [[S32:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT:    [[S64:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT:    [[U8:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT:    [[U16:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT:    [[U32:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT:    [[U64:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT:    [[F16:%.*]] = alloca <vscale x 8 x half>, align 16
// CHECK-NEXT:    [[F32:%.*]] = alloca <vscale x 4 x float>, align 16
// CHECK-NEXT:    [[F64:%.*]] = alloca <vscale x 2 x double>, align 16
// CHECK-NEXT:    [[BF16:%.*]] = alloca <vscale x 8 x bfloat>, align 16
// CHECK-NEXT:    [[S8X2:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[S16X2:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[S32X2:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[X64X2:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[U8X2:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[U16X2:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[U32X2:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[U64X2:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[F16X2:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[F32X2:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[F64X2:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[BF16X2:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[S8X3:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[S16X3:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[S32X3:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[X64X3:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[U8X3:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[U16X3:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[U32X3:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[U64X3:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[F16X3:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[F32X3:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[F64X3:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[BF16X3:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[S8X4:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[S16X4:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[S32X4:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[X64X4:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[U8X4:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[U16X4:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[U32X4:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[U64X4:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[F16X4:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[F32X4:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[F64X4:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[BF16X4:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[B8:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT:    [[B8X2:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[B8X4:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[CNT:%.*]] = alloca target("aarch64.svcount"), align 2
// CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[S8]], align 16
// CHECK-NEXT:    store <vscale x 8 x i16> zeroinitializer, ptr [[S16]], align 16
// CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[S32]], align 16
// CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[S64]], align 16
// CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[U8]], align 16
// CHECK-NEXT:    store <vscale x 8 x i16> zeroinitializer, ptr [[U16]], align 16
// CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[U32]], align 16
// CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[U64]], align 16
// CHECK-NEXT:    store <vscale x 8 x half> zeroinitializer, ptr [[F16]], align 16
// CHECK-NEXT:    store <vscale x 4 x float> zeroinitializer, ptr [[F32]], align 16
// CHECK-NEXT:    store <vscale x 2 x double> zeroinitializer, ptr [[F64]], align 16
// CHECK-NEXT:    store <vscale x 8 x bfloat> zeroinitializer, ptr [[BF16]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[S8X2]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr [[S16X2]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr [[S32X2]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr [[X64X2]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[U8X2]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr [[U16X2]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr [[U32X2]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr [[U64X2]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half> } zeroinitializer, ptr [[F16X2]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr [[F32X2]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double> } zeroinitializer, ptr [[F64X2]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } zeroinitializer, ptr [[BF16X2]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[S8X3]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr [[S16X3]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr [[S32X3]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr [[X64X3]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[U8X3]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr [[U16X3]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr [[U32X3]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr [[U64X3]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } zeroinitializer, ptr [[F16X3]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr [[F32X3]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } zeroinitializer, ptr [[F64X3]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } zeroinitializer, ptr [[BF16X3]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[S8X4]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr [[S16X4]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr [[S32X4]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr [[X64X4]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[U8X4]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr [[U16X4]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr [[U32X4]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr [[U64X4]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } zeroinitializer, ptr [[F16X4]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr [[F32X4]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } zeroinitializer, ptr [[F64X4]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } zeroinitializer, ptr [[BF16X4]], align 16
// CHECK-NEXT:    store <vscale x 16 x i1> zeroinitializer, ptr [[B8]], align 2
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[B8X2]], align 2
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[B8X4]], align 2
// CHECK-NEXT:    store target("aarch64.svcount") zeroinitializer, ptr [[CNT]], align 2
// CHECK-NEXT:    ret void
//
void test_locals(void) {
  __SVInt8_t s8{};
  __SVInt16_t s16{};
  __SVInt32_t s32{};
  __SVInt64_t s64{};
  __SVUint8_t u8{};
  __SVUint16_t u16{};
  __SVUint32_t u32{};
  __SVUint64_t u64{};
  __SVFloat16_t f16{};
  __SVFloat32_t f32{};
  __SVFloat64_t f64{};
  __SVBfloat16_t bf16{};

  __clang_svint8x2_t s8x2{};
  __clang_svint16x2_t s16x2{};
  __clang_svint32x2_t s32x2{};
  __clang_svint64x2_t x64x2{};
  __clang_svuint8x2_t u8x2{};
  __clang_svuint16x2_t u16x2{};
  __clang_svuint32x2_t u32x2{};
  __clang_svuint64x2_t u64x2{};
  __clang_svfloat16x2_t f16x2{};
  __clang_svfloat32x2_t f32x2{};
  __clang_svfloat64x2_t f64x2{};
  __clang_svbfloat16x2_t bf16x2{};

  __clang_svint8x3_t s8x3{};
  __clang_svint16x3_t s16x3{};
  __clang_svint32x3_t s32x3{};
  __clang_svint64x3_t x64x3{};
  __clang_svuint8x3_t u8x3{};
  __clang_svuint16x3_t u16x3{};
  __clang_svuint32x3_t u32x3{};
  __clang_svuint64x3_t u64x3{};
  __clang_svfloat16x3_t f16x3{};
  __clang_svfloat32x3_t f32x3{};
  __clang_svfloat64x3_t f64x3{};
  __clang_svbfloat16x3_t bf16x3{};

  __clang_svint8x4_t s8x4{};
  __clang_svint16x4_t s16x4{};
  __clang_svint32x4_t s32x4{};
  __clang_svint64x4_t x64x4{};
  __clang_svuint8x4_t u8x4{};
  __clang_svuint16x4_t u16x4{};
  __clang_svuint32x4_t u32x4{};
  __clang_svuint64x4_t u64x4{};
  __clang_svfloat16x4_t f16x4{};
  __clang_svfloat32x4_t f32x4{};
  __clang_svfloat64x4_t f64x4{};
  __clang_svbfloat16x4_t bf16x4{};

  __SVBool_t b8{};
  __clang_svboolx2_t b8x2{};
  __clang_svboolx4_t b8x4{};

  __SVCount_t cnt{};
}

// CHECK-LABEL: define dso_local void @_Z12test_copy_s8u10__SVInt8_t
// CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT:    store <vscale x 16 x i8> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 16 x i8> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s8(__SVInt8_t a) {
  __SVInt8_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_s16u11__SVInt16_t
// CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT:    store <vscale x 8 x i16> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 8 x i16> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s16(__SVInt16_t a) {
  __SVInt16_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_s32u11__SVInt32_t
// CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT:    store <vscale x 4 x i32> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s32(__SVInt32_t a) {
  __SVInt32_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_s64u11__SVInt64_t
// CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT:    store <vscale x 2 x i64> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 2 x i64> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s64(__SVInt64_t a) {
  __SVInt64_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z12test_copy_u8u11__SVUint8_t
// CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 16 x i8>, align 16
// CHECK-NEXT:    store <vscale x 16 x i8> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 16 x i8> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u8(__SVUint8_t a) {
  __SVUint8_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_u16u12__SVUint16_t
// CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 8 x i16>, align 16
// CHECK-NEXT:    store <vscale x 8 x i16> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 8 x i16> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u16(__SVUint16_t a) {
  __SVUint16_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_u32u12__SVUint32_t
// CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT:    store <vscale x 4 x i32> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u32(__SVUint32_t a) {
  __SVUint32_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_u64u12__SVUint64_t
// CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 2 x i64>, align 16
// CHECK-NEXT:    store <vscale x 2 x i64> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 2 x i64> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u64(__SVUint64_t a) {
  __SVUint64_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_f16u13__SVFloat16_t
// CHECK-SAME: (<vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 8 x half>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 8 x half>, align 16
// CHECK-NEXT:    store <vscale x 8 x half> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x half>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 8 x half> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f16(__SVFloat16_t a) {
  __SVFloat16_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_f32u13__SVFloat32_t
// CHECK-SAME: (<vscale x 4 x float> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 4 x float>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 4 x float>, align 16
// CHECK-NEXT:    store <vscale x 4 x float> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 4 x float>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 4 x float> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f32(__SVFloat32_t a) {
  __SVFloat32_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_f64u13__SVFloat64_t
// CHECK-SAME: (<vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 2 x double>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 2 x double>, align 16
// CHECK-NEXT:    store <vscale x 2 x double> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x double>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 2 x double> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f64(__SVFloat64_t a) {
  __SVFloat64_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_bf16u14__SVBfloat16_t
// CHECK-SAME: (<vscale x 8 x bfloat> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 8 x bfloat>, align 16
// CHECK-NEXT:    store <vscale x 8 x bfloat> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x bfloat>, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store <vscale x 8 x bfloat> [[TMP0]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_bf16(__SVBfloat16_t a) {
  __SVBfloat16_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_s8x210svint8x2_t
// CHECK-SAME: (<vscale x 16 x i8> [[A_COERCE0:%.*]], <vscale x 16 x i8> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], <vscale x 16 x i8> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s8x2(__clang_svint8x2_t a) {
  __clang_svint8x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s16x211svint16x2_t
// CHECK-SAME: (<vscale x 8 x i16> [[A_COERCE0:%.*]], <vscale x 8 x i16> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s16x2(__clang_svint16x2_t a) {
  __clang_svint16x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s32x211svint32x2_t
// CHECK-SAME: (<vscale x 4 x i32> [[A_COERCE0:%.*]], <vscale x 4 x i32> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s32x2(__clang_svint32x2_t a) {
  __clang_svint32x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s64x211svint64x2_t
// CHECK-SAME: (<vscale x 2 x i64> [[A_COERCE0:%.*]], <vscale x 2 x i64> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } poison, <vscale x 2 x i64> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], <vscale x 2 x i64> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s64x2(__clang_svint64x2_t a) {
  __clang_svint64x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_u8x211svuint8x2_t
// CHECK-SAME: (<vscale x 16 x i8> [[A_COERCE0:%.*]], <vscale x 16 x i8> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], <vscale x 16 x i8> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u8x2(__clang_svuint8x2_t a) {
  __clang_svuint8x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u16x212svuint16x2_t
// CHECK-SAME: (<vscale x 8 x i16> [[A_COERCE0:%.*]], <vscale x 8 x i16> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u16x2(__clang_svuint16x2_t a) {
  __clang_svuint16x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u32x212svuint32x2_t
// CHECK-SAME: (<vscale x 4 x i32> [[A_COERCE0:%.*]], <vscale x 4 x i32> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u32x2(__clang_svuint32x2_t a) {
  __clang_svuint32x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u64x212svuint64x2_t
// CHECK-SAME: (<vscale x 2 x i64> [[A_COERCE0:%.*]], <vscale x 2 x i64> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } poison, <vscale x 2 x i64> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], <vscale x 2 x i64> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u64x2(__clang_svuint64x2_t a) {
  __clang_svuint64x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f16x213svfloat16x2_t
// CHECK-SAME: (<vscale x 8 x half> [[A_COERCE0:%.*]], <vscale x 8 x half> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half> } poison, <vscale x 8 x half> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], <vscale x 8 x half> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x half>, <vscale x 8 x half> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x half>, <vscale x 8 x half> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f16x2(__clang_svfloat16x2_t a) {
  __clang_svfloat16x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f32x213svfloat32x2_t
// CHECK-SAME: (<vscale x 4 x float> [[A_COERCE0:%.*]], <vscale x 4 x float> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float> } poison, <vscale x 4 x float> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], <vscale x 4 x float> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x float>, <vscale x 4 x float> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 4 x float>, <vscale x 4 x float> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f32x2(__clang_svfloat32x2_t a) {
  __clang_svfloat32x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f64x213svfloat64x2_t
// CHECK-SAME: (<vscale x 2 x double> [[A_COERCE0:%.*]], <vscale x 2 x double> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double> } poison, <vscale x 2 x double> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], <vscale x 2 x double> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x double>, <vscale x 2 x double> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 2 x double>, <vscale x 2 x double> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f64x2(__clang_svfloat64x2_t a) {
  __clang_svfloat64x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x214svbfloat16x2_t
// CHECK-SAME: (<vscale x 8 x bfloat> [[A_COERCE0:%.*]], <vscale x 8 x bfloat> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } poison, <vscale x 8 x bfloat> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]], <vscale x 8 x bfloat> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_bf16x2(__clang_svbfloat16x2_t a) {
  __clang_svbfloat16x2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_s8x310svint8x3_t
// CHECK-SAME: (<vscale x 16 x i8> [[A_COERCE0:%.*]], <vscale x 16 x i8> [[A_COERCE1:%.*]], <vscale x 16 x i8> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], <vscale x 16 x i8> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], <vscale x 16 x i8> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s8x3(__clang_svint8x3_t a) {
  __clang_svint8x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s16x311svint16x3_t
// CHECK-SAME: (<vscale x 8 x i16> [[A_COERCE0:%.*]], <vscale x 8 x i16> [[A_COERCE1:%.*]], <vscale x 8 x i16> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], <vscale x 8 x i16> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s16x3(__clang_svint16x3_t a) {
  __clang_svint16x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s32x311svint32x3_t
// CHECK-SAME: (<vscale x 4 x i32> [[A_COERCE0:%.*]], <vscale x 4 x i32> [[A_COERCE1:%.*]], <vscale x 4 x i32> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], <vscale x 4 x i32> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s32x3(__clang_svint32x3_t a) {
  __clang_svint32x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s64x311svint64x3_t
// CHECK-SAME: (<vscale x 2 x i64> [[A_COERCE0:%.*]], <vscale x 2 x i64> [[A_COERCE1:%.*]], <vscale x 2 x i64> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } poison, <vscale x 2 x i64> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], <vscale x 2 x i64> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], <vscale x 2 x i64> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s64x3(__clang_svint64x3_t a) {
  __clang_svint64x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_u8x311svuint8x3_t
// CHECK-SAME: (<vscale x 16 x i8> [[A_COERCE0:%.*]], <vscale x 16 x i8> [[A_COERCE1:%.*]], <vscale x 16 x i8> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], <vscale x 16 x i8> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], <vscale x 16 x i8> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u8x3(__clang_svuint8x3_t a) {
  __clang_svuint8x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u16x312svuint16x3_t
// CHECK-SAME: (<vscale x 8 x i16> [[A_COERCE0:%.*]], <vscale x 8 x i16> [[A_COERCE1:%.*]], <vscale x 8 x i16> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], <vscale x 8 x i16> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u16x3(__clang_svuint16x3_t a) {
  __clang_svuint16x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u32x312svuint32x3_t
// CHECK-SAME: (<vscale x 4 x i32> [[A_COERCE0:%.*]], <vscale x 4 x i32> [[A_COERCE1:%.*]], <vscale x 4 x i32> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], <vscale x 4 x i32> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u32x3(__clang_svuint32x3_t a) {
  __clang_svuint32x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u64x312svuint64x3_t
// CHECK-SAME: (<vscale x 2 x i64> [[A_COERCE0:%.*]], <vscale x 2 x i64> [[A_COERCE1:%.*]], <vscale x 2 x i64> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } poison, <vscale x 2 x i64> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], <vscale x 2 x i64> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], <vscale x 2 x i64> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u64x3(__clang_svuint64x3_t a) {
  __clang_svuint64x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f16x313svfloat16x3_t
// CHECK-SAME: (<vscale x 8 x half> [[A_COERCE0:%.*]], <vscale x 8 x half> [[A_COERCE1:%.*]], <vscale x 8 x half> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } poison, <vscale x 8 x half> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], <vscale x 8 x half> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], <vscale x 8 x half> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f16x3(__clang_svfloat16x3_t a) {
  __clang_svfloat16x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f32x313svfloat32x3_t
// CHECK-SAME: (<vscale x 4 x float> [[A_COERCE0:%.*]], <vscale x 4 x float> [[A_COERCE1:%.*]], <vscale x 4 x float> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } poison, <vscale x 4 x float> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], <vscale x 4 x float> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], <vscale x 4 x float> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f32x3(__clang_svfloat32x3_t a) {
  __clang_svfloat32x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f64x313svfloat64x3_t
// CHECK-SAME: (<vscale x 2 x double> [[A_COERCE0:%.*]], <vscale x 2 x double> [[A_COERCE1:%.*]], <vscale x 2 x double> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } poison, <vscale x 2 x double> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], <vscale x 2 x double> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], <vscale x 2 x double> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f64x3(__clang_svfloat64x3_t a) {
  __clang_svfloat64x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x314svbfloat16x3_t
// CHECK-SAME: (<vscale x 8 x bfloat> [[A_COERCE0:%.*]], <vscale x 8 x bfloat> [[A_COERCE1:%.*]], <vscale x 8 x bfloat> [[A_COERCE2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } poison, <vscale x 8 x bfloat> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]], <vscale x 8 x bfloat> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], <vscale x 8 x bfloat> [[A_COERCE2]], 2
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_bf16x3(__clang_svbfloat16x3_t a) {
  __clang_svbfloat16x3_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_s8x410svint8x4_t
// CHECK-SAME: (<vscale x 16 x i8> [[A_COERCE0:%.*]], <vscale x 16 x i8> [[A_COERCE1:%.*]], <vscale x 16 x i8> [[A_COERCE2:%.*]], <vscale x 16 x i8> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], <vscale x 16 x i8> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], <vscale x 16 x i8> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], <vscale x 16 x i8> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s8x4(__clang_svint8x4_t a) {
  __clang_svint8x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s16x411svint16x4_t
// CHECK-SAME: (<vscale x 8 x i16> [[A_COERCE0:%.*]], <vscale x 8 x i16> [[A_COERCE1:%.*]], <vscale x 8 x i16> [[A_COERCE2:%.*]], <vscale x 8 x i16> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], <vscale x 8 x i16> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], <vscale x 8 x i16> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s16x4(__clang_svint16x4_t a) {
  __clang_svint16x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s32x411svint32x4_t
// CHECK-SAME: (<vscale x 4 x i32> [[A_COERCE0:%.*]], <vscale x 4 x i32> [[A_COERCE1:%.*]], <vscale x 4 x i32> [[A_COERCE2:%.*]], <vscale x 4 x i32> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], <vscale x 4 x i32> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], <vscale x 4 x i32> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s32x4(__clang_svint32x4_t a) {
  __clang_svint32x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_s64x411svint64x4_t
// CHECK-SAME: (<vscale x 2 x i64> [[A_COERCE0:%.*]], <vscale x 2 x i64> [[A_COERCE1:%.*]], <vscale x 2 x i64> [[A_COERCE2:%.*]], <vscale x 2 x i64> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } poison, <vscale x 2 x i64> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], <vscale x 2 x i64> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], <vscale x 2 x i64> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], <vscale x 2 x i64> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_s64x4(__clang_svint64x4_t a) {
  __clang_svint64x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_u8x411svuint8x4_t
// CHECK-SAME: (<vscale x 16 x i8> [[A_COERCE0:%.*]], <vscale x 16 x i8> [[A_COERCE1:%.*]], <vscale x 16 x i8> [[A_COERCE2:%.*]], <vscale x 16 x i8> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], <vscale x 16 x i8> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], <vscale x 16 x i8> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], <vscale x 16 x i8> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u8x4(__clang_svuint8x4_t a) {
  __clang_svuint8x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u16x412svuint16x4_t
// CHECK-SAME: (<vscale x 8 x i16> [[A_COERCE0:%.*]], <vscale x 8 x i16> [[A_COERCE1:%.*]], <vscale x 8 x i16> [[A_COERCE2:%.*]], <vscale x 8 x i16> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], <vscale x 8 x i16> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], <vscale x 8 x i16> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u16x4(__clang_svuint16x4_t a) {
  __clang_svuint16x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u32x412svuint32x4_t
// CHECK-SAME: (<vscale x 4 x i32> [[A_COERCE0:%.*]], <vscale x 4 x i32> [[A_COERCE1:%.*]], <vscale x 4 x i32> [[A_COERCE2:%.*]], <vscale x 4 x i32> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], <vscale x 4 x i32> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], <vscale x 4 x i32> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u32x4(__clang_svuint32x4_t a) {
  __clang_svuint32x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_u64x412svuint64x4_t
// CHECK-SAME: (<vscale x 2 x i64> [[A_COERCE0:%.*]], <vscale x 2 x i64> [[A_COERCE1:%.*]], <vscale x 2 x i64> [[A_COERCE2:%.*]], <vscale x 2 x i64> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } poison, <vscale x 2 x i64> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], <vscale x 2 x i64> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], <vscale x 2 x i64> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], <vscale x 2 x i64> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_u64x4(__clang_svuint64x4_t a) {
  __clang_svuint64x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f16x413svfloat16x4_t
// CHECK-SAME: (<vscale x 8 x half> [[A_COERCE0:%.*]], <vscale x 8 x half> [[A_COERCE1:%.*]], <vscale x 8 x half> [[A_COERCE2:%.*]], <vscale x 8 x half> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } poison, <vscale x 8 x half> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], <vscale x 8 x half> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], <vscale x 8 x half> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], <vscale x 8 x half> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f16x4(__clang_svfloat16x4_t a) {
  __clang_svfloat16x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f32x413svfloat32x4_t
// CHECK-SAME: (<vscale x 4 x float> [[A_COERCE0:%.*]], <vscale x 4 x float> [[A_COERCE1:%.*]], <vscale x 4 x float> [[A_COERCE2:%.*]], <vscale x 4 x float> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } poison, <vscale x 4 x float> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], <vscale x 4 x float> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], <vscale x 4 x float> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], <vscale x 4 x float> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f32x4(__clang_svfloat32x4_t a) {
  __clang_svfloat32x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z15test_copy_f64x413svfloat64x4_t
// CHECK-SAME: (<vscale x 2 x double> [[A_COERCE0:%.*]], <vscale x 2 x double> [[A_COERCE1:%.*]], <vscale x 2 x double> [[A_COERCE2:%.*]], <vscale x 2 x double> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } poison, <vscale x 2 x double> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], <vscale x 2 x double> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], <vscale x 2 x double> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], <vscale x 2 x double> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_f64x4(__clang_svfloat64x4_t a) {
  __clang_svfloat64x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x414svbfloat16x4_t
// CHECK-SAME: (<vscale x 8 x bfloat> [[A_COERCE0:%.*]], <vscale x 8 x bfloat> [[A_COERCE1:%.*]], <vscale x 8 x bfloat> [[A_COERCE2:%.*]], <vscale x 8 x bfloat> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 16
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } poison, <vscale x 8 x bfloat> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]], <vscale x 8 x bfloat> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], <vscale x 8 x bfloat> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], <vscale x 8 x bfloat> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], ptr [[A]], align 16
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, ptr [[A]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[A1]], ptr [[A_ADDR]], align 16
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, ptr [[A_ADDR]], align 16
// CHECK-NEXT:    store { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], ptr [[B]], align 16
// CHECK-NEXT:    ret void
//
void test_copy_bf16x4(__clang_svbfloat16x4_t a) {
  __clang_svbfloat16x4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z12test_copy_b8u10__SVBool_t
// CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT:    [[B:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT:    store <vscale x 16 x i1> [[A]], ptr [[A_ADDR]], align 2
// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[A_ADDR]], align 2
// CHECK-NEXT:    store <vscale x 16 x i1> [[TMP0]], ptr [[B]], align 2
// CHECK-NEXT:    ret void
//
void test_copy_b8(__SVBool_t a) {
  __SVBool_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_b8x210svboolx2_t
// CHECK-SAME: (<vscale x 16 x i1> [[A_COERCE0:%.*]], <vscale x 16 x i1> [[A_COERCE1:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } poison, <vscale x 16 x i1> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], <vscale x 16 x i1> [[A_COERCE1]], 1
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP1]], ptr [[A]], align 2
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[A]], align 2
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1> } [[A1]], ptr [[A_ADDR]], align 2
// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[A_ADDR]], align 2
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP2]], ptr [[B]], align 2
// CHECK-NEXT:    ret void
//
void test_copy_b8x2(__clang_svboolx2_t a) {
  __clang_svboolx2_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z14test_copy_b8x410svboolx4_t
// CHECK-SAME: (<vscale x 16 x i1> [[A_COERCE0:%.*]], <vscale x 16 x i1> [[A_COERCE1:%.*]], <vscale x 16 x i1> [[A_COERCE2:%.*]], <vscale x 16 x i1> [[A_COERCE3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[B:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } poison, <vscale x 16 x i1> [[A_COERCE0]], 0
// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], <vscale x 16 x i1> [[A_COERCE1]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP1]], <vscale x 16 x i1> [[A_COERCE2]], 2
// CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP2]], <vscale x 16 x i1> [[A_COERCE3]], 3
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP3]], ptr [[A]], align 2
// CHECK-NEXT:    [[A1:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[A]], align 2
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[A1]], ptr [[A_ADDR]], align 2
// CHECK-NEXT:    [[TMP4:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[A_ADDR]], align 2
// CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP4]], ptr [[B]], align 2
// CHECK-NEXT:    ret void
//
void test_copy_b8x4(__clang_svboolx4_t a) {
  __clang_svboolx4_t b{a};
}

// CHECK-LABEL: define dso_local void @_Z13test_copy_cntu11__SVCount_t
// CHECK-SAME: (target("aarch64.svcount") [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca target("aarch64.svcount"), align 2
// CHECK-NEXT:    [[B:%.*]] = alloca target("aarch64.svcount"), align 2
// CHECK-NEXT:    store target("aarch64.svcount") [[A]], ptr [[A_ADDR]], align 2
// CHECK-NEXT:    [[TMP0:%.*]] = load target("aarch64.svcount"), ptr [[A_ADDR]], align 2
// CHECK-NEXT:    store target("aarch64.svcount") [[TMP0]], ptr [[B]], align 2
// CHECK-NEXT:    ret void
//
void test_copy_cnt(__SVCount_t a) {
  __SVCount_t b{a};
}