llvm/llvm/test/Transforms/Scalarizer/min-bits.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -scalarize-min-bits=16 -S | FileCheck %s --check-prefixes=CHECK,MIN16
; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -scalarize-min-bits=32 -S | FileCheck %s --check-prefixes=CHECK,MIN32
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
; MIN16-LABEL: @load_add_store_v2i16(
; MIN16-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
; MIN16-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
; MIN16-NEXT:    [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
; MIN16-NEXT:    store i16 [[C_I0]], ptr [[PA]], align 8
; MIN16-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @load_add_store_v2i16(
; MIN32-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8
; MIN32-NEXT:    [[C:%.*]] = add <2 x i16> [[A]], [[B]]
; MIN32-NEXT:    store <2 x i16> [[C]], ptr [[PA]], align 8
; MIN32-NEXT:    ret void
;
  %a = load <2 x i16>, ptr %pa, align 8
  %b = load <2 x i16>, ptr %pb, align 8
  %c = add <2 x i16> %a, %b
  store <2 x i16> %c, ptr %pa, align 8
  ret void
}

define void @load_add_store_v3i16(ptr %pa, ptr %pb) {
; MIN16-LABEL: @load_add_store_v3i16(
; MIN16-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
; MIN16-NEXT:    [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
; MIN16-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
; MIN16-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
; MIN16-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
; MIN16-NEXT:    [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4
; MIN16-NEXT:    [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
; MIN16-NEXT:    [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]]
; MIN16-NEXT:    store i16 [[C_I0]], ptr [[PA]], align 8
; MIN16-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 2
; MIN16-NEXT:    store i16 [[C_I2]], ptr [[PA_I2]], align 4
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @load_add_store_v3i16(
; MIN32-NEXT:    [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
; MIN32-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 4
; MIN32-NEXT:    [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8
; MIN32-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 4
; MIN32-NEXT:    [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]]
; MIN32-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
; MIN32-NEXT:    store <2 x i16> [[C_I0]], ptr [[PA]], align 8
; MIN32-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 4
; MIN32-NEXT:    ret void
;
  %a = load <3 x i16>, ptr %pa, align 8
  %b = load <3 x i16>, ptr %pb, align 8
  %c = add <3 x i16> %a, %b
  store <3 x i16> %c, ptr %pa, align 8
  ret void
}

define void @load_add_store_v4i16(ptr %pa, ptr %pb) {
; MIN16-LABEL: @load_add_store_v4i16(
; MIN16-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
; MIN16-NEXT:    [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2
; MIN16-NEXT:    [[PB_I3:%.*]] = getelementptr i16, ptr [[PB]], i32 3
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
; MIN16-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
; MIN16-NEXT:    [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3
; MIN16-NEXT:    [[A_I3:%.*]] = load i16, ptr [[PA_I3]], align 2
; MIN16-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
; MIN16-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
; MIN16-NEXT:    [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4
; MIN16-NEXT:    [[B_I3:%.*]] = load i16, ptr [[PB_I3]], align 2
; MIN16-NEXT:    [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
; MIN16-NEXT:    [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]]
; MIN16-NEXT:    [[C_I3:%.*]] = add i16 [[A_I3]], [[B_I3]]
; MIN16-NEXT:    store i16 [[C_I0]], ptr [[PA]], align 8
; MIN16-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 2
; MIN16-NEXT:    store i16 [[C_I2]], ptr [[PA_I2]], align 4
; MIN16-NEXT:    store i16 [[C_I3]], ptr [[PA_I3]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @load_add_store_v4i16(
; MIN32-NEXT:    [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
; MIN32-NEXT:    [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
; MIN32-NEXT:    [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8
; MIN32-NEXT:    [[B_I1:%.*]] = load <2 x i16>, ptr [[PB_I1]], align 4
; MIN32-NEXT:    [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]]
; MIN32-NEXT:    [[C_I1:%.*]] = add <2 x i16> [[A_I1]], [[B_I1]]
; MIN32-NEXT:    store <2 x i16> [[C_I0]], ptr [[PA]], align 8
; MIN32-NEXT:    store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4
; MIN32-NEXT:    ret void
;
  %a = load <4 x i16>, ptr %pa, align 8
  %b = load <4 x i16>, ptr %pb, align 8
  %c = add <4 x i16> %a, %b
  store <4 x i16> %c, ptr %pa, align 8
  ret void
}

define void @load_add_store_v4i10(ptr %pa, ptr %pb) {
; MIN16-LABEL: @load_add_store_v4i10(
; MIN16-NEXT:    [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x i10> [[A]], i64 0
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x i10> [[A]], i64 1
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x i10> [[A]], i64 2
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x i10> [[A]], i64 3
; MIN16-NEXT:    [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <4 x i10> [[B]], i64 0
; MIN16-NEXT:    [[C_I0:%.*]] = add i10 [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 1
; MIN16-NEXT:    [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]]
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <4 x i10> [[B]], i64 2
; MIN16-NEXT:    [[C_I2:%.*]] = add i10 [[A_I2]], [[B_I2]]
; MIN16-NEXT:    [[B_I3:%.*]] = extractelement <4 x i10> [[B]], i64 3
; MIN16-NEXT:    [[C_I3:%.*]] = add i10 [[A_I3]], [[B_I3]]
; MIN16-NEXT:    [[C_UPTO0:%.*]] = insertelement <4 x i10> poison, i10 [[C_I0]], i64 0
; MIN16-NEXT:    [[C_UPTO1:%.*]] = insertelement <4 x i10> [[C_UPTO0]], i10 [[C_I1]], i64 1
; MIN16-NEXT:    [[C_UPTO2:%.*]] = insertelement <4 x i10> [[C_UPTO1]], i10 [[C_I2]], i64 2
; MIN16-NEXT:    [[C:%.*]] = insertelement <4 x i10> [[C_UPTO2]], i10 [[C_I3]], i64 3
; MIN16-NEXT:    store <4 x i10> [[C]], ptr [[PA]], align 8
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @load_add_store_v4i10(
; MIN32-NEXT:    [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x i10> [[A]], <4 x i10> poison, <3 x i32> <i32 0, i32 1, i32 2>
; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <4 x i10> [[A]], i64 3
; MIN32-NEXT:    [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <4 x i10> [[B]], <4 x i10> poison, <3 x i32> <i32 0, i32 1, i32 2>
; MIN32-NEXT:    [[C_I0:%.*]] = add <3 x i10> [[A_I0]], [[B_I0]]
; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 3
; MIN32-NEXT:    [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i10> [[C_I0]], <3 x i10> [[C_I0]], <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
; MIN32-NEXT:    [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3
; MIN32-NEXT:    store <4 x i10> [[C]], ptr [[PA]], align 8
; MIN32-NEXT:    ret void
;
  %a = load <4 x i10>, ptr %pa, align 8
  %b = load <4 x i10>, ptr %pb, align 8
  %c = add <4 x i10> %a, %b
  store <4 x i10> %c, ptr %pa, align 8
  ret void
}

define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, i1 %cc) {
; MIN16-LABEL: @select_uniform_condition_v2f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    ret <2 x half> [[R]]
;
; MIN32-LABEL: @select_uniform_condition_v2f16(
; MIN32-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]]
; MIN32-NEXT:    ret <2 x half> [[R]]
;
  %r = select i1 %cc, <2 x half> %a, <2 x half> %b
  ret <2 x half> %r
}

define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, i1 %cc) {
; MIN16-LABEL: @select_uniform_condition_v3f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    ret <3 x half> [[R]]
;
; MIN32-LABEL: @select_uniform_condition_v3f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]]
; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
; MIN32-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT:    ret <3 x half> [[R]]
;
  %r = select i1 %cc, <3 x half> %a, <3 x half> %b
  ret <3 x half> %r
}

define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, i1 %cc) {
; MIN16-LABEL: @select_uniform_condition_v4f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]]
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
; MIN16-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
; MIN16-NEXT:    [[R_I3:%.*]] = select i1 [[CC]], half [[A_I3]], half [[B_I3]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
; MIN16-NEXT:    ret <4 x half> [[R]]
;
; MIN32-LABEL: @select_uniform_condition_v4f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]]
; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], <2 x half> [[A_I1]], <2 x half> [[B_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    ret <4 x half> [[R]]
;
  %r = select i1 %cc, <4 x half> %a, <4 x half> %b
  ret <4 x half> %r
}

define <4 x half> @select_vector_condition_v4f16(<4 x half> %a, <4 x half> %b, <4 x i1> %cc) {
; CHECK-LABEL: @select_vector_condition_v4f16(
; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]]
; CHECK-NEXT:    ret <4 x half> [[R]]
;
  %r = select <4 x i1> %cc, <4 x half> %a, <4 x half> %b
  ret <4 x half> %r
}

define <2 x half> @unary_v2f16(<2 x half> %a) {
; MIN16-LABEL: @unary_v2f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fneg half [[A_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    ret <2 x half> [[R]]
;
; MIN32-LABEL: @unary_v2f16(
; MIN32-NEXT:    [[R:%.*]] = fneg <2 x half> [[A:%.*]]
; MIN32-NEXT:    ret <2 x half> [[R]]
;
  %r = fneg <2 x half> %a
  ret <2 x half> %r
}

define <3 x half> @unary_v3f16(<3 x half> %a) {
; MIN16-LABEL: @unary_v3f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fneg half [[A_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fneg half [[A_I2]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    ret <3 x half> [[R]]
;
; MIN32-LABEL: @unary_v3f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = fneg <2 x half> [[A_I0]]
; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN32-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT:    ret <3 x half> [[R]]
;
  %r = fneg <3 x half> %a
  ret <3 x half> %r
}

define <4 x half> @unary_v4f16(<4 x half> %a) {
; MIN16-LABEL: @unary_v4f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fneg half [[A_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fneg half [[A_I2]]
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
; MIN16-NEXT:    [[R_I3:%.*]] = fneg half [[A_I3]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
; MIN16-NEXT:    ret <4 x half> [[R]]
;
; MIN32-LABEL: @unary_v4f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = fneg <2 x half> [[A_I0]]
; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[R_I1:%.*]] = fneg <2 x half> [[A_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    ret <4 x half> [[R]]
;
  %r = fneg <4 x half> %a
  ret <4 x half> %r
}

define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) {
; MIN16-LABEL: @binary_v2f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    ret <2 x half> [[R]]
;
; MIN32-LABEL: @binary_v2f16(
; MIN32-NEXT:    [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]]
; MIN32-NEXT:    ret <2 x half> [[R]]
;
  %r = fadd <2 x half> %a, %b
  ret <2 x half> %r
}

define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) {
; MIN16-LABEL: @binary_v3f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    ret <3 x half> [[R]]
;
; MIN32-LABEL: @binary_v3f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = fadd <2 x half> [[A_I0]], [[B_I0]]
; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
; MIN32-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT:    ret <3 x half> [[R]]
;
  %r = fadd <3 x half> %a, %b
  ret <3 x half> %r
}

define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) {
; MIN16-LABEL: @binary_v4f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]]
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
; MIN16-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
; MIN16-NEXT:    [[R_I3:%.*]] = fadd half [[A_I3]], [[B_I3]]
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
; MIN16-NEXT:    ret <4 x half> [[R]]
;
; MIN32-LABEL: @binary_v4f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = fadd <2 x half> [[A_I0]], [[B_I0]]
; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[R_I1:%.*]] = fadd <2 x half> [[A_I1]], [[B_I1]]
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    ret <4 x half> [[R]]
;
  %r = fadd <4 x half> %a, %b
  ret <4 x half> %r
}

define <2 x i16> @fptosi_v2f16(<2 x half> %a) {
; MIN16-LABEL: @fptosi_v2f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[R_I0]], i64 0
; MIN16-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
; MIN16-NEXT:    ret <2 x i16> [[R]]
;
; MIN32-LABEL: @fptosi_v2f16(
; MIN32-NEXT:    [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16>
; MIN32-NEXT:    ret <2 x i16> [[R]]
;
  %r = fptosi <2 x half> %a to <2 x i16>
  ret <2 x i16> %r
}

define <3 x i16> @fptosi_v3f16(<3 x half> %a) {
; MIN16-LABEL: @fptosi_v3f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fptosi half [[A_I2]] to i16
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
; MIN16-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2
; MIN16-NEXT:    ret <3 x i16> [[R]]
;
; MIN32-LABEL: @fptosi_v3f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16>
; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN32-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2
; MIN32-NEXT:    ret <3 x i16> [[R]]
;
  %r = fptosi <3 x half> %a to <3 x i16>
  ret <3 x i16> %r
}

define <4 x i16> @fptosi_v4f16(<4 x half> %a) {
; MIN16-LABEL: @fptosi_v4f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fptosi half [[A_I2]] to i16
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
; MIN16-NEXT:    [[R_I3:%.*]] = fptosi half [[A_I3]] to i16
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[R_I3]], i64 3
; MIN16-NEXT:    ret <4 x i16> [[R]]
;
; MIN32-LABEL: @fptosi_v4f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16>
; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[R_I1:%.*]] = fptosi <2 x half> [[A_I1]] to <2 x i16>
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    ret <4 x i16> [[R]]
;
  %r = fptosi <4 x half> %a to <4 x i16>
  ret <4 x i16> %r
}

define <4 x float> @fpext_v4f16(<4 x half> %a) {
; MIN16-LABEL: @fpext_v4f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = fpext half [[A_I0]] to float
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = fpext half [[A_I1]] to float
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = fpext half [[A_I2]] to float
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
; MIN16-NEXT:    [[R_I3:%.*]] = fpext half [[A_I3]] to float
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x float> poison, float [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x float> [[R_UPTO0]], float [[R_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x float> [[R_UPTO1]], float [[R_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x float> [[R_UPTO2]], float [[R_I3]], i64 3
; MIN16-NEXT:    ret <4 x float> [[R]]
;
; MIN32-LABEL: @fpext_v4f16(
; MIN32-NEXT:    [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float>
; MIN32-NEXT:    ret <4 x float> [[R]]
;
  %r = fpext <4 x half> %a to <4 x float>
  ret <4 x float> %r
}

define <4 x i1> @icmp_v4f16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: @icmp_v4f16(
; CHECK-NEXT:    [[R:%.*]] = icmp ugt <4 x i16> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT:    ret <4 x i1> [[R]]
;
  %r = icmp ugt <4 x i16> %a, %b
  ret <4 x i1> %r
}

define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) {
; MIN16-LABEL: @gep1_v4(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
; MIN16-NEXT:    [[P_I0:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i16 [[A_I0]]
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I1]]
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
; MIN16-NEXT:    [[P_I2:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I2]]
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
; MIN16-NEXT:    [[P_I3:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I3]]
; MIN16-NEXT:    [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
; MIN16-NEXT:    [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
; MIN16-NEXT:    [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
; MIN16-NEXT:    [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
; MIN16-NEXT:    ret <4 x ptr> [[P]]
;
; MIN32-LABEL: @gep1_v4(
; MIN32-NEXT:    [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]]
; MIN32-NEXT:    ret <4 x ptr> [[P]]
;
  %p = getelementptr i32, ptr %base, <4 x i16> %a
  ret <4 x ptr> %p
}

define <4 x ptr> @gep2_v4(<4 x ptr> %base, i16 %a) {
; CHECK-LABEL: @gep2_v4(
; CHECK-NEXT:    [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0
; CHECK-NEXT:    [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A:%.*]]
; CHECK-NEXT:    [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1
; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A]]
; CHECK-NEXT:    [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2
; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A]]
; CHECK-NEXT:    [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3
; CHECK-NEXT:    [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A]]
; CHECK-NEXT:    [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
; CHECK-NEXT:    [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
; CHECK-NEXT:    [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
; CHECK-NEXT:    [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
; CHECK-NEXT:    ret <4 x ptr> [[P]]
;
  %p = getelementptr i32, <4 x ptr> %base, i16 %a
  ret <4 x ptr> %p
}

define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) {
; MIN16-LABEL: @gep3_v4(
; MIN16-NEXT:    [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
; MIN16-NEXT:    [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A_I0]]
; MIN16-NEXT:    [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A_I1]]
; MIN16-NEXT:    [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
; MIN16-NEXT:    [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A_I2]]
; MIN16-NEXT:    [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
; MIN16-NEXT:    [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A_I3]]
; MIN16-NEXT:    [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
; MIN16-NEXT:    [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
; MIN16-NEXT:    [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
; MIN16-NEXT:    [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
; MIN16-NEXT:    ret <4 x ptr> [[P]]
;
; MIN32-LABEL: @gep3_v4(
; MIN32-NEXT:    [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]]
; MIN32-NEXT:    ret <4 x ptr> [[P]]
;
  %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a
  ret <4 x ptr> %p
}

define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) {
; MIN16-LABEL: @insertelement_v2i16(
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <2 x i16> [[A:%.*]], i64 0
; MIN16-NEXT:    store i16 [[A_I0]], ptr [[P]], align 4
; MIN16-NEXT:    store i16 [[B:%.*]], ptr [[P_I1]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @insertelement_v2i16(
; MIN32-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1
; MIN32-NEXT:    store <2 x i16> [[R]], ptr [[P:%.*]], align 4
; MIN32-NEXT:    ret void
;
  %r = insertelement <2 x i16> %a, i16 %b, i64 1
  store <2 x i16> %r, ptr %p
  ret void
}

define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) {
; MIN16-LABEL: @insertelement_v3i16(
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
; MIN16-NEXT:    [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <3 x i16> [[A:%.*]], i64 0
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <3 x i16> [[A]], i64 1
; MIN16-NEXT:    store i16 [[A_I0]], ptr [[P]], align 8
; MIN16-NEXT:    store i16 [[A_I1]], ptr [[P_I1]], align 2
; MIN16-NEXT:    store i16 [[B:%.*]], ptr [[P_I2]], align 4
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @insertelement_v3i16(
; MIN32-NEXT:    [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <3 x i16> [[A:%.*]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    store <2 x i16> [[A_I0]], ptr [[P]], align 8
; MIN32-NEXT:    store i16 [[B:%.*]], ptr [[P_I1]], align 4
; MIN32-NEXT:    ret void
;
  %r = insertelement <3 x i16> %a, i16 %b, i64 2
  store <3 x i16> %r, ptr %p
  ret void
}

define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) {
; MIN16-LABEL: @insertelement_v4i16(
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
; MIN16-NEXT:    [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2
; MIN16-NEXT:    [[P_I3:%.*]] = getelementptr i16, ptr [[P]], i32 3
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
; MIN16-NEXT:    store i16 [[A_I0]], ptr [[P]], align 8
; MIN16-NEXT:    store i16 [[A_I1]], ptr [[P_I1]], align 2
; MIN16-NEXT:    store i16 [[A_I2]], ptr [[P_I2]], align 4
; MIN16-NEXT:    store i16 [[B:%.*]], ptr [[P_I3]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @insertelement_v4i16(
; MIN32-NEXT:    [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1
; MIN32-NEXT:    store <2 x i16> [[A_I0]], ptr [[P]], align 8
; MIN32-NEXT:    store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4
; MIN32-NEXT:    ret void
;
  %r = insertelement <4 x i16> %a, i16 %b, i64 3
  store <4 x i16> %r, ptr %p
  ret void
}

define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) {
; MIN16-LABEL: @load_insertelement_v2i16(
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 4
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[A_I0]], i64 0
; MIN16-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[B:%.*]], i64 1
; MIN16-NEXT:    ret <2 x i16> [[R]]
;
; MIN32-LABEL: @load_insertelement_v2i16(
; MIN32-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
; MIN32-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1
; MIN32-NEXT:    ret <2 x i16> [[R]]
;
  %a = load <2 x i16>, ptr %pa
  %r = insertelement <2 x i16> %a, i16 %b, i64 1
  ret <2 x i16> %r
}

define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) {
; MIN16-LABEL: @load_insertelement_v3i16(
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[A_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1
; MIN16-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[B:%.*]], i64 2
; MIN16-NEXT:    ret <3 x i16> [[R]]
;
; MIN32-LABEL: @load_insertelement_v3i16(
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2
; MIN32-NEXT:    ret <3 x i16> [[R]]
;
  %a = load <3 x i16>, ptr %pa
  %r = insertelement <3 x i16> %a, i16 %b, i64 2
  ret <3 x i16> %r
}

define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) {
; MIN16-LABEL: @load_insertelement_v4i16(
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
; MIN16-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[A_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[A_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[B:%.*]], i64 3
; MIN16-NEXT:    ret <4 x i16> [[R]]
;
; MIN32-LABEL: @load_insertelement_v4i16(
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
; MIN32-NEXT:    [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
; MIN32-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    ret <4 x i16> [[R]]
;
  %a = load <4 x i16>, ptr %pa
  %r = insertelement <4 x i16> %a, i16 %b, i64 3
  ret <4 x i16> %r
}

define void @shufflevector_grow(ptr %pa, ptr %pb) {
; MIN16-LABEL: @shufflevector_grow(
; MIN16-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 2
; MIN16-NEXT:    [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3
; MIN16-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
; MIN16-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA]], align 4
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 4
; MIN16-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
; MIN16-NEXT:    store i16 [[A_I0]], ptr [[PA]], align 8
; MIN16-NEXT:    store i16 [[A_I1]], ptr [[PA_I1]], align 2
; MIN16-NEXT:    store i16 [[B_I0]], ptr [[PA_I2]], align 4
; MIN16-NEXT:    store i16 [[B_I1]], ptr [[PA_I3]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @shufflevector_grow(
; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA:%.*]], i32 1
; MIN32-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA]], align 4
; MIN32-NEXT:    [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4
; MIN32-NEXT:    [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; MIN32-NEXT:    [[R_I0:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    store <2 x i16> [[R_I0]], ptr [[PA]], align 8
; MIN32-NEXT:    [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4
; MIN32-NEXT:    ret void
;
  %a = load <2 x i16>, ptr %pa
  %b = load <2 x i16>, ptr %pb
  %r = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  store <4 x i16> %r, ptr %pa
  ret void
}

define void @shufflevector_shrink(ptr %pa) {
; MIN16-LABEL: @shufflevector_shrink(
; MIN16-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
; MIN16-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
; MIN16-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
; MIN16-NEXT:    store i16 [[A_I1]], ptr [[PA]], align 4
; MIN16-NEXT:    store i16 [[A_I2]], ptr [[PA_I1]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @shufflevector_shrink(
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
; MIN32-NEXT:    [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
; MIN32-NEXT:    [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I1]], <2 x i16> [[A_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[A:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
; MIN32-NEXT:    store <2 x i16> [[R]], ptr [[PA]], align 4
; MIN32-NEXT:    ret void
;
  %a = load <4 x i16>, ptr %pa
  %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
  store <2 x i16> %r, ptr %pa
  ret void
}

define void @phi_v2f16(ptr %base, i64 %bound) {
; MIN16-LABEL: @phi_v2f16(
; MIN16-NEXT:  entry:
; MIN16-NEXT:    [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
; MIN16-NEXT:    br label [[LOOP:%.*]]
; MIN16:       loop:
; MIN16-NEXT:    [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE]], i64 [[IDX]]
; MIN16-NEXT:    [[A_I0:%.*]] = load half, ptr [[P]], align 2
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
; MIN16-NEXT:    [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
; MIN16-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
; MIN16-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
; MIN16-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
; MIN16-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
; MIN16:       end:
; MIN16-NEXT:    store half [[X_NEXT_I0]], ptr [[BASE]], align 4
; MIN16-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @phi_v2f16(
; MIN32-NEXT:  entry:
; MIN32-NEXT:    br label [[LOOP:%.*]]
; MIN32:       loop:
; MIN32-NEXT:    [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
; MIN32-NEXT:    [[A:%.*]] = load <2 x half>, ptr [[P]], align 2
; MIN32-NEXT:    [[X_NEXT]] = fadd <2 x half> [[X]], [[A]]
; MIN32-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
; MIN32-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
; MIN32-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
; MIN32:       end:
; MIN32-NEXT:    store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4
; MIN32-NEXT:    ret void
;
entry:
  br label %loop

loop:
  %x = phi <2 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
  %p = getelementptr <2 x half>, ptr %base, i64 %idx
  %a = load <2 x half>, ptr %p, align 2
  %x.next = fadd <2 x half> %x, %a
  %idx.next = add i64 %idx, 1
  %cc = icmp ult i64 %idx.next, %bound
  br i1 %cc, label %loop, label %end

end:
  store <2 x half> %x.next, ptr %base
  ret void
}

define void @phi_v3f16(ptr %base, i64 %bound) {
; MIN16-LABEL: @phi_v3f16(
; MIN16-NEXT:  entry:
; MIN16-NEXT:    [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
; MIN16-NEXT:    [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2
; MIN16-NEXT:    br label [[LOOP:%.*]]
; MIN16:       loop:
; MIN16-NEXT:    [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]]
; MIN16-NEXT:    [[A_I0:%.*]] = load half, ptr [[P]], align 2
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
; MIN16-NEXT:    [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2
; MIN16-NEXT:    [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2
; MIN16-NEXT:    [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
; MIN16-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
; MIN16-NEXT:    [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]]
; MIN16-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
; MIN16-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
; MIN16-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
; MIN16:       end:
; MIN16-NEXT:    store half [[X_NEXT_I0]], ptr [[BASE]], align 8
; MIN16-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
; MIN16-NEXT:    store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @phi_v3f16(
; MIN32-NEXT:  entry:
; MIN32-NEXT:    [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1
; MIN32-NEXT:    br label [[LOOP:%.*]]
; MIN32:       loop:
; MIN32-NEXT:    [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]]
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2
; MIN32-NEXT:    [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1
; MIN32-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
; MIN32-NEXT:    [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]]
; MIN32-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
; MIN32-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
; MIN32-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
; MIN32-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
; MIN32:       end:
; MIN32-NEXT:    store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8
; MIN32-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
; MIN32-NEXT:    ret void
;
entry:
  br label %loop

loop:
  %x = phi <3 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
  %p = getelementptr <3 x half>, ptr %base, i64 %idx
  %a = load <3 x half>, ptr %p, align 2
  %x.next = fadd <3 x half> %x, %a
  %idx.next = add i64 %idx, 1
  %cc = icmp ult i64 %idx.next, %bound
  br i1 %cc, label %loop, label %end

end:
  store <3 x half> %x.next, ptr %base
  ret void
}

define void @phi_v4f16(ptr %base, i64 %bound) {
; MIN16-LABEL: @phi_v4f16(
; MIN16-NEXT:  entry:
; MIN16-NEXT:    [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
; MIN16-NEXT:    [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2
; MIN16-NEXT:    [[BASE_I3:%.*]] = getelementptr half, ptr [[BASE]], i32 3
; MIN16-NEXT:    br label [[LOOP:%.*]]
; MIN16:       loop:
; MIN16-NEXT:    [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[X_I3:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I3:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
; MIN16-NEXT:    [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]]
; MIN16-NEXT:    [[A_I0:%.*]] = load half, ptr [[P]], align 2
; MIN16-NEXT:    [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
; MIN16-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
; MIN16-NEXT:    [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2
; MIN16-NEXT:    [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2
; MIN16-NEXT:    [[P_I3:%.*]] = getelementptr half, ptr [[P]], i32 3
; MIN16-NEXT:    [[A_I3:%.*]] = load half, ptr [[P_I3]], align 2
; MIN16-NEXT:    [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
; MIN16-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
; MIN16-NEXT:    [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]]
; MIN16-NEXT:    [[X_NEXT_I3]] = fadd half [[X_I3]], [[A_I3]]
; MIN16-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
; MIN16-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
; MIN16-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
; MIN16:       end:
; MIN16-NEXT:    store half [[X_NEXT_I0]], ptr [[BASE]], align 8
; MIN16-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
; MIN16-NEXT:    store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4
; MIN16-NEXT:    store half [[X_NEXT_I3]], ptr [[BASE_I3]], align 2
; MIN16-NEXT:    ret void
;
; MIN32-LABEL: @phi_v4f16(
; MIN32-NEXT:  entry:
; MIN32-NEXT:    [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1
; MIN32-NEXT:    br label [[LOOP:%.*]]
; MIN32:       loop:
; MIN32-NEXT:    [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[X_I1:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
; MIN32-NEXT:    [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]]
; MIN32-NEXT:    [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2
; MIN32-NEXT:    [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1
; MIN32-NEXT:    [[A_I1:%.*]] = load <2 x half>, ptr [[P_I1]], align 2
; MIN32-NEXT:    [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]]
; MIN32-NEXT:    [[X_NEXT_I1]] = fadd <2 x half> [[X_I1]], [[A_I1]]
; MIN32-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
; MIN32-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
; MIN32-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
; MIN32:       end:
; MIN32-NEXT:    store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8
; MIN32-NEXT:    store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
; MIN32-NEXT:    ret void
;
entry:
  br label %loop

loop:
  %x = phi <4 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
  %p = getelementptr <4 x half>, ptr %base, i64 %idx
  %a = load <4 x half>, ptr %p, align 2
  %x.next = fadd <4 x half> %x, %a
  %idx.next = add i64 %idx, 1
  %cc = icmp ult i64 %idx.next, %bound
  br i1 %cc, label %loop, label %end

end:
  store <4 x half> %x.next, ptr %base
  ret void
}

define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) {
; MIN16-LABEL: @call_v2f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    ret <2 x half> [[R]]
;
; MIN32-LABEL: @call_v2f16(
; MIN32-NEXT:    [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]])
; MIN32-NEXT:    ret <2 x half> [[R]]
;
  %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
  ret <2 x half> %r
}

define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) {
; MIN16-LABEL: @call_v3f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]])
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    ret <3 x half> [[R]]
;
; MIN32-LABEL: @call_v3f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]])
; MIN32-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
; MIN32-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
; MIN32-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT:    ret <3 x half> [[R]]
;
  %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b)
  ret <3 x half> %r
}

define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
; MIN16-LABEL: @call_v4f16(
; MIN16-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; MIN16-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
; MIN16-NEXT:    [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
; MIN16-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
; MIN16-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
; MIN16-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
; MIN16-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
; MIN16-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
; MIN16-NEXT:    [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]])
; MIN16-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
; MIN16-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
; MIN16-NEXT:    [[R_I3:%.*]] = call half @llvm.minnum.f16(half [[A_I3]], half [[B_I3]])
; MIN16-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
; MIN16-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
; MIN16-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
; MIN16-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
; MIN16-NEXT:    ret <4 x half> [[R]]
;
; MIN32-LABEL: @call_v4f16(
; MIN32-NEXT:    [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; MIN32-NEXT:    [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]])
; MIN32-NEXT:    [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT:    [[R_I1:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I1]], <2 x half> [[B_I1]])
; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT:    ret <4 x half> [[R]]
;
  %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
  ret <4 x half> %r
}

declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)