llvm/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature -fullfp16 \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
// RUN: | opt -S -passes=sroa \
// RUN: | FileCheck %s --check-prefixes=CHECK-NOFP16
// RUN: %clang_cc1 -triple armv8a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
// RUN: | opt -S -passes=sroa \
// RUN: | FileCheck %s --check-prefixes=CHECK-FP16

// REQUIRES: arm-registered-target

#include <arm_neon.h>

// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vbsl_f16(
// CHECK-NOFP16-SAME: <4 x i16> noundef [[A:%.*]], <2 x i32> noundef [[B_COERCE:%.*]], <2 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[C_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> [[TMP8]])
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <2 x i32>
// CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP12]]
//
// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vbsl_f16(
// CHECK-FP16-SAME: <4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
// CHECK-FP16-NEXT:    [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
// CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half>
// CHECK-FP16-NEXT:    ret <4 x half> [[TMP3]]
//
float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
  return vbsl_f16(a, b, c);
}

// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vbslq_f16(
// CHECK-NOFP16-SAME: <8 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B_COERCE:%.*]], <4 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[C_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <4 x i32>
// CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP12]]
//
// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vbslq_f16(
// CHECK-FP16-SAME: <8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
// CHECK-FP16-NEXT:    [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
// CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half>
// CHECK-FP16-NEXT:    ret <8 x half> [[TMP3]]
//
float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
  return vbslq_f16(a, b, c);
}

// CHECK-NOFP16-LABEL: define dso_local void @test_vzip_f16(
// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
// CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
// CHECK-NOFP16-NEXT:    store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
// CHECK-NOFP16-NEXT:    [[VZIP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
// CHECK-NOFP16-NEXT:    store <4 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META3]]
// CHECK-NOFP16-NEXT:    ret void
//
// CHECK-FP16-LABEL: define dso_local void @test_vzip_f16(
// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
// CHECK-FP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
// CHECK-FP16-NEXT:    store <4 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
// CHECK-FP16-NEXT:    [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
// CHECK-FP16-NEXT:    store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META3]]
// CHECK-FP16-NEXT:    ret void
//
float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
  return vzip_f16(a, b);
}

// CHECK-NOFP16-LABEL: define dso_local void @test_vzipq_f16(
// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
// CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
// CHECK-NOFP16-NEXT:    store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
// CHECK-NOFP16-NEXT:    [[VZIP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
// CHECK-NOFP16-NEXT:    store <8 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META6]]
// CHECK-NOFP16-NEXT:    ret void
//
// CHECK-FP16-LABEL: define dso_local void @test_vzipq_f16(
// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
// CHECK-FP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
// CHECK-FP16-NEXT:    store <8 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
// CHECK-FP16-NEXT:    [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
// CHECK-FP16-NEXT:    store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META6]]
// CHECK-FP16-NEXT:    ret void
//
float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
  return vzipq_f16(a, b);
}

// CHECK-NOFP16-LABEL: define dso_local void @test_vuzp_f16(
// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
// CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
// CHECK-NOFP16-NEXT:    store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
// CHECK-NOFP16-NEXT:    [[VUZP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
// CHECK-NOFP16-NEXT:    store <4 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META9]]
// CHECK-NOFP16-NEXT:    ret void
//
// CHECK-FP16-LABEL: define dso_local void @test_vuzp_f16(
// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
// CHECK-FP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
// CHECK-FP16-NEXT:    store <4 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
// CHECK-FP16-NEXT:    [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
// CHECK-FP16-NEXT:    store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META9]]
// CHECK-FP16-NEXT:    ret void
//
float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
  return vuzp_f16(a, b);
}

// CHECK-NOFP16-LABEL: define dso_local void @test_vuzpq_f16(
// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
// CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
// CHECK-NOFP16-NEXT:    store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
// CHECK-NOFP16-NEXT:    [[VUZP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
// CHECK-NOFP16-NEXT:    store <8 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META12]]
// CHECK-NOFP16-NEXT:    ret void
//
// CHECK-FP16-LABEL: define dso_local void @test_vuzpq_f16(
// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
// CHECK-FP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
// CHECK-FP16-NEXT:    store <8 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
// CHECK-FP16-NEXT:    [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
// CHECK-FP16-NEXT:    store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META12]]
// CHECK-FP16-NEXT:    ret void
//
float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
  return vuzpq_f16(a, b);
}

// CHECK-NOFP16-LABEL: define dso_local void @test_vtrn_f16(
// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
// CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
// CHECK-NOFP16-NEXT:    store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
// CHECK-NOFP16-NEXT:    [[VTRN3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
// CHECK-NOFP16-NEXT:    store <4 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META15]]
// CHECK-NOFP16-NEXT:    ret void
//
// CHECK-FP16-LABEL: define dso_local void @test_vtrn_f16(
// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
// CHECK-FP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
// CHECK-FP16-NEXT:    store <4 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
// CHECK-FP16-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
// CHECK-FP16-NEXT:    store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META15]]
// CHECK-FP16-NEXT:    ret void
//
float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
  return vtrn_f16(a, b);
}

// CHECK-NOFP16-LABEL: define dso_local void @test_vtrnq_f16(
// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
// CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
// CHECK-NOFP16-NEXT:    store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
// CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
// CHECK-NOFP16-NEXT:    [[VTRN3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
// CHECK-NOFP16-NEXT:    store <8 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META18]]
// CHECK-NOFP16-NEXT:    ret void
//
// CHECK-FP16-LABEL: define dso_local void @test_vtrnq_f16(
// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
// CHECK-FP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
// CHECK-FP16-NEXT:    store <8 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
// CHECK-FP16-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
// CHECK-FP16-NEXT:    store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META18]]
// CHECK-FP16-NEXT:    ret void
//
float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
  return vtrnq_f16(a, b);
}

// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vmov_n_f16(
// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
// CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32>
// CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP0]]
//
// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vmov_n_f16(
// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
// CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-FP16-NEXT:    ret <4 x half> [[VECINIT3]]
//
float16x4_t test_vmov_n_f16(float16_t a) {
  return vmov_n_f16(a);
}

// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vmovq_n_f16(
// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
// CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-NOFP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
// CHECK-NOFP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
// CHECK-NOFP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
// CHECK-NOFP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32>
// CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP0]]
//
// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vmovq_n_f16(
// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
// CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-FP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
// CHECK-FP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
// CHECK-FP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
// CHECK-FP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
// CHECK-FP16-NEXT:    ret <8 x half> [[VECINIT7]]
//
float16x8_t test_vmovq_n_f16(float16_t a) {
  return vmovq_n_f16(a);
}

// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_n_f16(
// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
// CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32>
// CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP0]]
//
// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_n_f16(
// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
// CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-FP16-NEXT:    ret <4 x half> [[VECINIT3]]
//
float16x4_t test_vdup_n_f16(float16_t a) {
  return vdup_n_f16(a);
}

// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_n_f16(
// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
// CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-NOFP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
// CHECK-NOFP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
// CHECK-NOFP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
// CHECK-NOFP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32>
// CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP0]]
//
// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_n_f16(
// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
// CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
// CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
// CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
// CHECK-FP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
// CHECK-FP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
// CHECK-FP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
// CHECK-FP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
// CHECK-FP16-NEXT:    ret <8 x half> [[VECINIT7]]
//
float16x8_t test_vdupq_n_f16(float16_t a) {
  return vdupq_n_f16(a);
}

// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_lane_f16(
// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <2 x i32>
// CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP4]]
//
// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_lane_f16(
// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK-FP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-FP16-NEXT:    ret <4 x half> [[LANE]]
//
float16x4_t test_vdup_lane_f16(float16x4_t a) {
  return vdup_lane_f16(a, 3);
}

// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_lane_f16(
// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <4 x i32>
// CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP4]]
//
// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_lane_f16(
// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK-FP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK-FP16-NEXT:    ret <8 x half> [[LANE]]
//
float16x8_t test_vdupq_lane_f16(float16x4_t a) {
  return vdupq_lane_f16(a, 3);
}

// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vext_f16(
// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
// CHECK-NOFP16-NEXT:    [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[VEXT]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <2 x i32>
// CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP7]]
//
// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vext_f16(
// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-FP16-NEXT:    [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
// CHECK-FP16-NEXT:    ret <4 x half> [[VEXT]]
//
float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
  return vext_f16(a, b, 2);
}

// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vextq_f16(
// CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <16 x i8>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
// CHECK-NOFP16-NEXT:    [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
// CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x i16> [[VEXT]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <4 x i32>
// CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP7]]
//
// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vextq_f16(
// CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
// CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-FP16-NEXT:    [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
// CHECK-FP16-NEXT:    ret <8 x half> [[VEXT]]
//
float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
  return vextq_f16(a, b, 5);
}

// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vrev64_f16(
// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <2 x i32>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <2 x i32>
// CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP5]]
//
// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vrev64_f16(
// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-FP16-NEXT:    ret <4 x half> [[SHUFFLE_I]]
//
float16x4_t test_vrev64_f16(float16x4_t a) {
  return vrev64_f16(a);
}

// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vrev64q_f16(
// CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NOFP16-NEXT:  entry:
// CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
// CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[SHUFFLE_I]] to <4 x i32>
// CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
// CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <4 x i32>
// CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP5]]
//
// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vrev64q_f16(
// CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-FP16-NEXT:  entry:
// CHECK-FP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
// CHECK-FP16-NEXT:    ret <8 x half> [[SHUFFLE_I]]
//
float16x8_t test_vrev64q_f16(float16x8_t a) {
  return vrev64q_f16(a);
}
//.
// CHECK-NOFP16: [[META3]] = !{[[META4:![0-9]+]]}
// CHECK-NOFP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"}
// CHECK-NOFP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"}
// CHECK-NOFP16: [[META6]] = !{[[META7:![0-9]+]]}
// CHECK-NOFP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"}
// CHECK-NOFP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"}
// CHECK-NOFP16: [[META9]] = !{[[META10:![0-9]+]]}
// CHECK-NOFP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"}
// CHECK-NOFP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"}
// CHECK-NOFP16: [[META12]] = !{[[META13:![0-9]+]]}
// CHECK-NOFP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"}
// CHECK-NOFP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"}
// CHECK-NOFP16: [[META15]] = !{[[META16:![0-9]+]]}
// CHECK-NOFP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"}
// CHECK-NOFP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"}
// CHECK-NOFP16: [[META18]] = !{[[META19:![0-9]+]]}
// CHECK-NOFP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"}
// CHECK-NOFP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"}
//.
// CHECK-FP16: [[META3]] = !{[[META4:![0-9]+]]}
// CHECK-FP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"}
// CHECK-FP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"}
// CHECK-FP16: [[META6]] = !{[[META7:![0-9]+]]}
// CHECK-FP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"}
// CHECK-FP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"}
// CHECK-FP16: [[META9]] = !{[[META10:![0-9]+]]}
// CHECK-FP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"}
// CHECK-FP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"}
// CHECK-FP16: [[META12]] = !{[[META13:![0-9]+]]}
// CHECK-FP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"}
// CHECK-FP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"}
// CHECK-FP16: [[META15]] = !{[[META16:![0-9]+]]}
// CHECK-FP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"}
// CHECK-FP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"}
// CHECK-FP16: [[META18]] = !{[[META19:![0-9]+]]}
// CHECK-FP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"}
// CHECK-FP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"}
//.