llvm/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s

define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vaddph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = fadd <16 x half> %x1, %x2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %ymm2, %ymm3
; CHECK-NEXT:    vaddph %ymm1, %ymm0, %ymm3 {%k1}
; CHECK-NEXT:    vaddph (%rsi), %ymm0, %ymm2 {%k1}
; CHECK-NEXT:    vaddph %ymm2, %ymm3, %ymm0
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %val = load <16 x half>, ptr %ptr
  %res0 = fadd <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
  %t3 = fadd <16 x half> %x1, %val
  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
  %res   =  fadd <16 x half> %res1 , %res2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vaddph %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %res0 = fadd <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
  ret <16 x half> %res1
}

define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fadd <8 x half> %x1, %x2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vaddph (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vaddph %xmm2, %xmm3, %xmm0
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %val = load <8 x half>, ptr %ptr
  %res0 = fadd <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
  %t3 = fadd <8 x half> %x1, %val
  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
  %res   =  fadd <8 x half> %res1 , %res2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %res0 = fadd <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res1
}

define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsubph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = fsub <16 x half> %x1, %x2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %ymm2, %ymm3
; CHECK-NEXT:    vsubph %ymm1, %ymm0, %ymm3 {%k1}
; CHECK-NEXT:    vsubph (%rsi), %ymm0, %ymm2 {%k1}
; CHECK-NEXT:    vsubph %ymm2, %ymm3, %ymm0
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %val = load <16 x half>, ptr %ptr
  %res0 = fsub <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
  %t3 = fsub <16 x half> %x1, %val
  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
  %res   =  fsub <16 x half> %res1 , %res2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsubph %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %res0 = fsub <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
  ret <16 x half> %res1
}

define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fsub <8 x half> %x1, %x2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vsubph (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vsubph %xmm2, %xmm3, %xmm0
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %val = load <8 x half>, ptr %ptr
  %res0 = fsub <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
  %t3 = fsub <8 x half> %x1, %val
  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
  %res   =  fsub <8 x half> %res1 , %res2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %res0 = fsub <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res1
}

define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmulph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = fmul <16 x half> %x1, %x2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %ymm2, %ymm3
; CHECK-NEXT:    vmulph %ymm1, %ymm0, %ymm3 {%k1}
; CHECK-NEXT:    vmulph (%rsi), %ymm0, %ymm2 {%k1}
; CHECK-NEXT:    vmulph %ymm2, %ymm3, %ymm0
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %val = load <16 x half>, ptr %ptr
  %res0 = fmul <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
  %t3 = fmul <16 x half> %x1, %val
  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
  %res   =  fmul <16 x half> %res1 , %res2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmulph %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %res0 = fmul <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
  ret <16 x half> %res1
}

define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmulph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fmul <8 x half> %x1, %x2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vmulph %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vmulph (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmulph %xmm2, %xmm3, %xmm0
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %val = load <8 x half>, ptr %ptr
  %res0 = fmul <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
  %t3 = fmul <8 x half> %x1, %val
  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
  %res   =  fmul <8 x half> %res1 , %res2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmulph %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %res0 = fmul <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res1
}

define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vdivph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = fdiv <16 x half> %x1, %x2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256_fast:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrcpph %ymm1, %ymm1
; CHECK-NEXT:    vmulph %ymm0, %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = fdiv fast <16 x half> %x1, %x2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %ymm2, %ymm3
; CHECK-NEXT:    vdivph %ymm1, %ymm0, %ymm3 {%k1}
; CHECK-NEXT:    vdivph (%rsi), %ymm0, %ymm2 {%k1}
; CHECK-NEXT:    vdivph %ymm2, %ymm3, %ymm0
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %val = load <16 x half>, ptr %ptr
  %res0 = fdiv <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
  %t3 = fdiv <16 x half> %x1, %val
  %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
  %res   =  fdiv <16 x half> %res1 , %res2
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vdivph %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i16 %mask to <16 x i1>
  %res0 = fdiv <16 x half> %x1, %x2
  %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
  ret <16 x half> %res1
}

define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vdivph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fdiv <8 x half> %x1, %x2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128_fast:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrcpph %xmm1, %xmm1
; CHECK-NEXT:    vmulph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = fdiv fast <8 x half> %x1, %x2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vdivph %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vdivph (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vdivph %xmm2, %xmm3, %xmm0
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %val = load <8 x half>, ptr %ptr
  %res0 = fdiv <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
  %t3 = fdiv <8 x half> %x1, %val
  %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
  %res   =  fdiv <8 x half> %res1 , %res2
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vdivph %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i8 %mask to <8 x i1>
  %res0 = fdiv <8 x half> %x1, %x2
  %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res1
}

define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_min_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vminph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res0 = fcmp olt <16 x half> %x1, %x2
  %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
  ret  <16 x half> %res1
}

define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_max_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmaxph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res0 = fcmp ogt <16 x half> %x1, %x2
  %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
  ret  <16 x half> %res1
}

define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_min_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vminph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res0 = fcmp olt <8 x half> %x1, %x2
  %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
  ret  <8 x half> %res1
}

define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_max_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmaxph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res0 = fcmp ogt <8 x half> %x1, %x2
  %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
  ret  <8 x half> %res1
}

declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>)
declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>)

define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_max_ph_128_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmaxph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2)
  ret  <8 x half> %res0
}

define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_max_ph_256_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmaxph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2)
  ret  <16 x half> %res0
}

declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>)
declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>)

define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
; CHECK-LABEL: test_min_ph_128_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vminph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2)
  ret  <8 x half> %res0
}

define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
; CHECK-LABEL: test_min_ph_256_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vminph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
  ret  <16 x half> %res0
}

declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)

define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
  ret <4 x double> %res
}

define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
  ret <4 x double> %res
}

declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)

define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
  ret <2 x double> %res
}

define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
  ret <2 x double> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtpd2phy (%rdi), %xmm0 {%k1}
; CHECK-NEXT:    retq
  %x0 = load <4 x double>, ptr %px0, align 32
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtpd2phx (%rdi), %xmm0 {%k1}
; CHECK-NEXT:    retq
  %x0 = load <2 x double>, ptr %px0, align 16
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)

define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
  ret <4 x float> %res
}

define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
  ret <4 x float> %res
}

define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
  ret <4 x float> %res
}

declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)

define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
  ret <8 x float> %res
}

define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
  ret <8 x float> %res
}

define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
  ret <8 x float> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
  %res2 = fadd <8 x half> %res, %res1
  ret <8 x half> %res2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}