llvm/llvm/test/CodeGen/X86/avx512fp16-arith.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512fp16 | FileCheck %s

define <32 x half> @vaddph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
; CHECK-LABEL: vaddph_512_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %x = fadd  <32 x half> %i, %j
  ret <32 x half> %x
}

define <32 x half> @vaddph_512_fold_test(<32 x half> %i, ptr %j) nounwind {
; CHECK-LABEL: vaddph_512_fold_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vaddph (%rdi), %zmm0, %zmm0
; CHECK-NEXT:    retq
  %tmp = load <32 x half>, ptr %j, align 4
  %x = fadd  <32 x half> %i, %tmp
  ret <32 x half> %x
}

define <32 x half> @vaddph_512_broadc_test(<32 x half> %a) nounwind {
; CHECK-LABEL: vaddph_512_broadc_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %b = fadd <32 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
  ret <32 x half> %b
}

define <16 x half> @vaddph_256_broadc_test(<16 x half> %a) nounwind {
; CHECK-LABEL: vaddph_256_broadc_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %b = fadd <16 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
  ret <16 x half> %b
}

define <8 x half> @vaddph_128_broadc_test(<8 x half> %a) nounwind {
; CHECK-LABEL: vaddph_128_broadc_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %b = fadd <8 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
  ret <8 x half> %b
}

define <32 x half> @vaddph_512_mask_test1(<32 x half> %i, <32 x half> %j, <32 x i1> %mask) nounwind readnone {
; CHECK-LABEL: vaddph_512_mask_test1:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsllw $7, %ymm2, %ymm2
; CHECK-NEXT:    vpmovb2m %ymm2, %k1
; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %x = fadd  <32 x half> %i, %j
  %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
  ret <32 x half> %r
}

define <32 x half> @vaddph_512_mask_test(<32 x half> %i, <32 x half> %j, <32 x half> %mask1) nounwind readnone {
; CHECK-LABEL: vaddph_512_mask_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT:    vcmpneq_oqph %zmm3, %zmm2, %k1
; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %mask = fcmp one <32 x half> %mask1, zeroinitializer
  %x = fadd  <32 x half> %i, %j
  %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
  ret <32 x half> %r
}

define <32 x half> @vaddph_512_maskz_test(<32 x half> %i, <32 x half> %j, <32 x half> %mask1) nounwind readnone {
; CHECK-LABEL: vaddph_512_maskz_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT:    vcmpneq_oqph %zmm3, %zmm2, %k1
; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = fcmp one <32 x half> %mask1, zeroinitializer
  %x = fadd  <32 x half> %i, %j
  %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
  ret <32 x half> %r
}

define <32 x half> @vaddph_512_mask_fold_test(<32 x half> %i, ptr %j.ptr, <32 x half> %mask1) nounwind readnone {
; CHECK-LABEL: vaddph_512_mask_fold_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT:    vcmpneq_oqph %zmm2, %zmm1, %k1
; CHECK-NEXT:    vaddph (%rdi), %zmm0, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %mask = fcmp one <32 x half> %mask1, zeroinitializer
  %j = load <32 x half>, ptr %j.ptr
  %x = fadd  <32 x half> %i, %j
  %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
  ret <32 x half> %r
}

define <32 x half> @vaddph_512_maskz_fold_test(<32 x half> %i, ptr %j.ptr, <32 x half> %mask1) nounwind readnone {
; CHECK-LABEL: vaddph_512_maskz_fold_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT:    vcmpneq_oqph %zmm2, %zmm1, %k1
; CHECK-NEXT:    vaddph (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = fcmp one <32 x half> %mask1, zeroinitializer
  %j = load <32 x half>, ptr %j.ptr
  %x = fadd  <32 x half> %i, %j
  %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
  ret <32 x half> %r
}

define <32 x half> @vaddph_512_maskz_fold_test_2(<32 x half> %i, ptr %j.ptr, <32 x half> %mask1) nounwind readnone {
; CHECK-LABEL: vaddph_512_maskz_fold_test_2:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT:    vcmpneq_oqph %zmm2, %zmm1, %k1
; CHECK-NEXT:    vaddph (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = fcmp one <32 x half> %mask1, zeroinitializer
  %j = load <32 x half>, ptr %j.ptr
  %x = fadd  <32 x half> %j, %i
  %r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
  ret <32 x half> %r
}

define <32 x half> @vsubph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
; CHECK-LABEL: vsubph_512_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vsubph %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %x = fsub  <32 x half> %i, %j
  ret <32 x half> %x
}

define <32 x half> @vmulph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
; CHECK-LABEL: vmulph_512_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vmulph %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %x = fmul  <32 x half> %i, %j
  ret <32 x half> %x
}

define <32 x half> @vdivph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
; CHECK-LABEL: vdivph_512_test:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vdivph %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %x = fdiv  <32 x half> %i, %j
  ret <32 x half> %x
}

define <32 x half> @vdivph_512_test_fast(<32 x half> %i, <32 x half> %j) nounwind readnone {
; CHECK-LABEL: vdivph_512_test_fast:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vrcpph %zmm1, %zmm1
; CHECK-NEXT:    vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %x = fdiv fast <32 x half> %i, %j
  ret <32 x half> %x
}

define half @add_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
; CHECK-LABEL: add_sh:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vaddsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vaddsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %x = load half, ptr %x.ptr
  %y = fadd  half %i, %j
  %r = fadd  half %x, %y
  ret half %r
}

define half @sub_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
; CHECK-LABEL: sub_sh:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vmovsh (%rdi), %xmm2
; CHECK-NEXT:    vsubsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vsubsh %xmm0, %xmm2, %xmm0
; CHECK-NEXT:    retq
  %x = load half, ptr %x.ptr
  %y = fsub  half %i, %j
  %r = fsub  half %x, %y
  ret half %r
}

define half @sub_sh_2(half %i, half %j, ptr %x.ptr) nounwind readnone {
; CHECK-LABEL: sub_sh_2:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vsubsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vsubsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %x = load half, ptr %x.ptr
  %y = fsub  half %i, %j
  %r = fsub  half %y, %x
  ret half %r
}

define half @mul_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
; CHECK-LABEL: mul_sh:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vmulsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmulsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %x = load half, ptr %x.ptr
  %y = fmul  half %i, %j
  %r = fmul  half %x, %y
  ret half %r
}

define half @div_sh(half %i, half %j, ptr %x.ptr) nounwind readnone {
; CHECK-LABEL: div_sh:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vmovsh (%rdi), %xmm2
; CHECK-NEXT:    vdivsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vdivsh %xmm0, %xmm2, %xmm0
; CHECK-NEXT:    retq
  %x = load half, ptr %x.ptr
  %y = fdiv  half %i, %j
  %r = fdiv  half %x, %y
  ret half %r
}

define half @div_sh_2(half %i, half %j, ptr %x.ptr) nounwind readnone {
; CHECK-LABEL: div_sh_2:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vdivsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vdivsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %x = load half, ptr %x.ptr
  %y = fdiv  half %i, %j
  %r = fdiv  half %y, %x
  ret half %r
}

define half @div_sh_3(half %i, half %j) nounwind readnone {
; CHECK-LABEL: div_sh_3:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vrcpsh %xmm1, %xmm1, %xmm1
; CHECK-NEXT:    vmulsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %r = fdiv fast half %i, %j
  ret half %r
}

define i1 @cmp_une_sh(half %x, half %y) {
; CHECK-LABEL: cmp_une_sh:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpneqsh %xmm1, %xmm0, %k0
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
entry:
  %0 = fcmp une half %x, %y
  ret i1 %0
}

define i1 @cmp_oeq_sh(half %x, half %y) {
; CHECK-LABEL: cmp_oeq_sh:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqsh %xmm1, %xmm0, %k0
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
entry:
  %0 = fcmp oeq half %x, %y
  ret i1 %0
}

define i1 @cmp_olt_sh(half %x, half %y) {
; CHECK-LABEL: cmp_olt_sh:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vucomish %xmm0, %xmm1
; CHECK-NEXT:    seta %al
; CHECK-NEXT:    retq
  entry:
  %0 = fcmp olt half %x, %y
  ret i1 %0
}

define <32 x i1> @cmp_ph(<32 x half> %x, <32 x half> %y) {
; CHECK-LABEL: cmp_ph:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpneqph %zmm1, %zmm0, %k0
; CHECK-NEXT:    vpmovm2b %k0, %ymm0
; CHECK-NEXT:    retq
entry:
  %0 = fcmp une <32 x half> %x, %y
  ret <32 x i1> %0
}

define half @fneg(half %x) {
; CHECK-LABEL: fneg:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = fneg half %x
  ret half %a
}

define half @fneg_idiom(half %x) {
; CHECK-LABEL: fneg_idiom:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = fsub half -0.0, %x
  ret half %a
}

define half @fabs(half %x) {
; CHECK-LABEL: fabs:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = call half @llvm.fabs.f16(half %x)
  ret half %a
}
declare half @llvm.fabs.f16(half)

define half @fcopysign(half %x, half %y) {
; CHECK-LABEL: fcopysign:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT:    vpternlogd $226, %xmm1, %xmm2, %xmm0
; CHECK-NEXT:    retq
  %a = call half @llvm.copysign.f16(half %x, half %y)
  ret half %a
}
declare half @llvm.copysign.f16(half, half)

define half @fround(half %x) {
; CHECK-LABEL: fround:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
; CHECK-NEXT:    vpternlogq $248, %xmm1, %xmm0, %xmm2
; CHECK-NEXT:    vaddsh %xmm2, %xmm0, %xmm0
; CHECK-NEXT:    vrndscalesh $11, %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = call half @llvm.round.f16(half %x)
  ret half %a
}
declare half @llvm.round.f16(half)

define <8 x half> @fnegv8f16(<8 x half> %x) {
; CHECK-LABEL: fnegv8f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = fneg <8 x half> %x
  ret <8 x half> %a
}

define <8 x half> @fneg_idiomv8f16(<8 x half> %x) {
; CHECK-LABEL: fneg_idiomv8f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = fsub <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
  ret <8 x half> %a
}

define <8 x half> @fabsv8f16(<8 x half> %x) {
; CHECK-LABEL: fabsv8f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = call <8 x half> @llvm.fabs.v8f16(<8 x half> %x)
  ret <8 x half> %a
}
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)

define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
; CHECK-LABEL: fcopysignv8f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
  ret <8 x half> %a
}
declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)

define <8 x half> @roundv8f16(<8 x half> %x) {
; CHECK-LABEL: roundv8f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
; CHECK-NEXT:    vpternlogq $248, %xmm1, %xmm0, %xmm2
; CHECK-NEXT:    vaddph %xmm2, %xmm0, %xmm0
; CHECK-NEXT:    vrndscaleph $11, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a = call <8 x half> @llvm.round.v8f16(<8 x half> %x)
  ret <8 x half> %a
}
declare <8 x half> @llvm.round.v8f16(<8 x half>)

define <16 x half> @fnegv16f16(<16 x half> %x) {
; CHECK-LABEL: fnegv16f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxor %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %a = fneg <16 x half> %x
  ret <16 x half> %a
}

define <16 x half> @fneg_idiomv16f16(<16 x half> %x) {
; CHECK-LABEL: fneg_idiomv16f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxor %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %a = fsub <16 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
  ret <16 x half> %a
}

define <16 x half> @fabsv16f16(<16 x half> %x) {
; CHECK-LABEL: fabsv16f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT:    vpand %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %a = call <16 x half> @llvm.fabs.v16f16(<16 x half> %x)
  ret <16 x half> %a
}
declare <16 x half> @llvm.fabs.v16f16(<16 x half>)

define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
; CHECK-LABEL: fcopysignv16f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
; CHECK-NEXT:    retq
  %a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
  ret <16 x half> %a
}
declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>)

define <16 x half> @roundv16f16(<16 x half> %x) {
; CHECK-LABEL: roundv16f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
; CHECK-NEXT:    vpternlogq $248, %ymm1, %ymm0, %ymm2
; CHECK-NEXT:    vaddph %ymm2, %ymm0, %ymm0
; CHECK-NEXT:    vrndscaleph $11, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %a = call <16 x half> @llvm.round.v16f16(<16 x half> %x)
  ret <16 x half> %a
}
declare <16 x half> @llvm.round.v16f16(<16 x half>)

define <32 x half> @fnegv32f16(<32 x half> %x) {
; CHECK-LABEL: fnegv32f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %a = fneg <32 x half> %x
  ret <32 x half> %a
}

define <32 x half> @fneg_idiomv32f16(<32 x half> %x) {
; CHECK-LABEL: fneg_idiomv32f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %a = fsub <32 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
  ret <32 x half> %a
}

define <32 x half> @fabsv32f16(<32 x half> %x) {
; CHECK-LABEL: fabsv32f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %a = call <32 x half> @llvm.fabs.v32f16(<32 x half> %x)
  ret <32 x half> %a
}
declare <32 x half> @llvm.fabs.v32f16(<32 x half>)

define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
; CHECK-LABEL: fcopysignv32f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
  ret <32 x half> %a
}
declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)

define <32 x half> @roundv32f16(<32 x half> %x) {
; CHECK-LABEL: roundv32f16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT:    vpbroadcastw {{.*#+}} zmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
; CHECK-NEXT:    vpternlogq $248, %zmm1, %zmm0, %zmm2
; CHECK-NEXT:    vaddph %zmm2, %zmm0, %zmm0
; CHECK-NEXT:    vrndscaleph $11, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %a = call <32 x half> @llvm.round.v32f16(<32 x half> %x)
  ret <32 x half> %a
}
declare <32 x half> @llvm.round.v32f16(<32 x half>)

define <8 x half>  @regression_test1(<8 x half> %x, <8 x half> %y) #0 {
; CHECK-LABEL: regression_test1:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm2
; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
; CHECK-NEXT:    retq
entry:
  %a = fsub <8 x half> %x, %y
  %b = fadd <8 x half> %x, %y
  %c = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
  ret <8 x half> %c
}

define <8 x i16>  @regression_test2(<8 x float> %x) #0 {
; CHECK-LABEL: regression_test2:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0
; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
entry:
  %a = fptoui <8 x float> %x to  <8 x i16>
  ret <8 x i16> %a
}

define <8 x i16>  @regression_test3(<8 x float> %x) #0 {
; CHECK-LABEL: regression_test3:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
entry:
  %a = fptosi <8 x float> %x to  <8 x i16>
  ret <8 x i16> %a
}

define <8 x i16>  @regression_test4(<8 x double> %x) #0 {
; CHECK-LABEL: regression_test4:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcvttpd2udq %zmm0, %ymm0
; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
entry:
  %a = fptoui <8 x double> %x to  <8 x i16>
  ret <8 x i16> %a
}

define <8 x i16>  @regression_test5(<8 x double> %x) #0 {
; CHECK-LABEL: regression_test5:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcvttpd2dq %zmm0, %ymm0
; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
entry:
  %a = fptosi <8 x double> %x to  <8 x i16>
  ret <8 x i16> %a
}

define <8 x i1> @fcmp_v8f16(<8 x half> %a, <8 x half> %b)
; CHECK-LABEL: fcmp_v8f16:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqph %xmm1, %xmm0, %k0
; CHECK-NEXT:    vpmovm2w %k0, %xmm0
; CHECK-NEXT:    retq
{
entry:
  %0 = fcmp oeq <8 x half> %a, %b
  ret <8 x i1> %0
}

define <16 x i1> @fcmp_v16f16(<16 x half> %a, <16 x half> %b)
; CHECK-LABEL: fcmp_v16f16:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqph %ymm1, %ymm0, %k0
; CHECK-NEXT:    vpmovm2b %k0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
{
entry:
  %0 = fcmp oeq <16 x half> %a, %b
  ret <16 x i1> %0
}

define <32 x i1> @fcmp_v32f16(<32 x half> %a, <32 x half> %b)
; CHECK-LABEL: fcmp_v32f16:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqph %zmm1, %zmm0, %k0
; CHECK-NEXT:    vpmovm2b %k0, %ymm0
; CHECK-NEXT:    retq
{
entry:
  %0 = fcmp oeq <32 x half> %a, %b
  ret <32 x i1> %0
}

define <8 x i16> @zext_fcmp_v8f16(<8 x half> %a, <8 x half> %b)
; CHECK-LABEL: zext_fcmp_v8f16:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqph %xmm1, %xmm0, %k0
; CHECK-NEXT:    vpmovm2w %k0, %xmm0
; CHECK-NEXT:    vpsrlw $15, %xmm0, %xmm0
; CHECK-NEXT:    retq
{
entry:
  %0 = fcmp oeq <8 x half> %a, %b
  %1 = zext <8 x i1> %0 to <8 x i16>
  ret <8 x i16> %1
}

define <16 x i16> @zext_fcmp_v16f16(<16 x half> %a, <16 x half> %b)
; CHECK-LABEL: zext_fcmp_v16f16:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqph %ymm1, %ymm0, %k0
; CHECK-NEXT:    vpmovm2w %k0, %ymm0
; CHECK-NEXT:    vpsrlw $15, %ymm0, %ymm0
; CHECK-NEXT:    retq
{
entry:
  %0 = fcmp oeq <16 x half> %a, %b
  %1 = zext <16 x i1> %0 to <16 x i16>
  ret <16 x i16> %1
}

define <32 x i16> @zext_fcmp_v32f16(<32 x half> %a, <32 x half> %b)
; CHECK-LABEL: zext_fcmp_v32f16:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    vcmpeqph %zmm1, %zmm0, %k0
; CHECK-NEXT:    vpmovm2w %k0, %zmm0
; CHECK-NEXT:    vpsrlw $15, %zmm0, %zmm0
; CHECK-NEXT:    retq
{
entry:
  %0 = fcmp oeq <32 x half> %a, %b
  %1 = zext <32 x i1> %0 to <32 x i16>
  ret <32 x i16> %1
}