llvm/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s

define signext i16 @test_mm_cvtsi128_si16(<2 x i64> %A) local_unnamed_addr #0 {
; CHECK-LABEL: test_mm_cvtsi128_si16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vmovw %xmm0, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    retq
entry:
  %0 = bitcast <2 x i64> %A to <8 x i16>
  %vecext.i = extractelement <8 x i16> %0, i32 0
  ret i16 %vecext.i
}

define <2 x i64> @test_mm_cvtsi16_si128(i16 signext %A) local_unnamed_addr #0 {
; CHECK-LABEL: test_mm_cvtsi16_si128:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vmovw %edi, %xmm0
; CHECK-NEXT:    retq
entry:
  %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %A, i32 0
  %0 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
  ret <2 x i64> %0
}

define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = sitofp <8 x i32> %x0 to <8 x half>
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = sitofp <8 x i32> %x0 to <8 x half>
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res
}

define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) {
; CHECK-LABEL: sint_to_fp_8i32_to_8f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = sitofp <8 x i32> %x to <8 x half>
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) {
; CHECK-LABEL: sint_to_fp_4i32_to_4f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = sitofp <4 x i32> %x to <4 x half>
  ret <4 x half> %res
}

define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) {
; CHECK-LABEL: sint_to_fp_2i32_to_2f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = sitofp <2 x i32> %x to <2 x half>
  ret <2 x half> %res
}

define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) {
; CHECK-LABEL: fp_to_sint_4f16_to_4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptosi <4 x half> %x to <4 x i32>
  ret <4 x i32> %res
}

define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) {
; CHECK-LABEL: fp_to_sint_2f16_to_2i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptosi <2 x half> %x to <2 x i32>
  ret <2 x i32> %res
}

define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) {
; CHECK-LABEL: fp_to_sint_2f16_to_2i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptosi <2 x half> %x to <2 x i16>
  ret <2 x i16> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = uitofp <8 x i32> %x0 to <8 x half>
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = uitofp <8 x i32> %x0 to <8 x half>
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res
}

define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) {
; CHECK-LABEL: uint_to_fp_8i32_to_8f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = uitofp <8 x i32> %x to <8 x half>
  ret <8 x half> %res
}

define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) {
; CHECK-LABEL: fp_to_uint_8f16_to_8i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = fptoui <8 x half> %x to <8 x i32>
  ret <8 x i32> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) {
; CHECK-LABEL: uint_to_fp_4i32_to_4f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = uitofp <4 x i32> %x to <4 x half>
  ret <4 x half> %res
}

define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) {
; CHECK-LABEL: uint_to_fp_2i32_to_2f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = uitofp <2 x i32> %x to <2 x half>
  ret <2 x half> %res
}

define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) {
; CHECK-LABEL: fp_to_uint_4f16_to_4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptoui <4 x half> %x to <4 x i32>
  ret <4 x i32> %res
}

define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) {
; CHECK-LABEL: fp_to_uint_2f16_to_2i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptoui <2 x half> %x to <2 x i32>
  ret <2 x i32> %res
}

define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) {
; CHECK-LABEL: fp_to_uint_2f16_to_2i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptoui <2 x half> %x to <2 x i16>
  ret <2 x i16> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2dq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2dq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2dq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2dq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2dq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)

define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
  ret <4 x i32> %res
}

define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
  ret <4 x i32> %res
}

declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)

define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
  ret <8 x i32> %res
}

define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
  ret <8 x i32> %res
}

declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)

define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
  ret <4 x double> %res
}

define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
  ret <4 x double> %res
}

declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)

define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
  ret <2 x double> %res
}

define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
  ret <2 x double> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtpd2phy (%rdi), %xmm0 {%k1}
; CHECK-NEXT:    retq
  %x0 = load <4 x double>, ptr %px0, align 32
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtpd2phx (%rdi), %xmm0 {%k1}
; CHECK-NEXT:    retq
  %x0 = load <2 x double>, ptr %px0, align 16
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) {
; CHECK-LABEL: sint_to_fp_4i64_to_4f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = sitofp <4 x i64> %x to <4 x half>
  ret <4 x half> %res
}

define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) {
; CHECK-LABEL: fp_to_sint_4f16_to_4i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = fptosi <4 x half> %x to <4 x i64>
  ret <4 x i64> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) {
; CHECK-LABEL: sint_to_fp_2i64_to_2f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = sitofp <2 x i64> %x to <2 x half>
  ret <2 x half> %res
}

define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) {
; CHECK-LABEL: fp_to_sint_2f16_to_2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptosi <2 x half> %x to <2 x i64>
  ret <2 x i64> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) {
; CHECK-LABEL: uint_to_fp_4i64_to_4f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = uitofp <4 x i64> %x to <4 x half>
  ret <4 x half> %res
}

define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) {
; CHECK-LABEL: fp_to_uint_4f16_to_4i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = fptoui <4 x half> %x to <4 x i64>
  ret <4 x i64> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8)

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) {
; CHECK-LABEL: uint_to_fp_2i64_to_2f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = uitofp <2 x i64> %x to <2 x half>
  ret <2 x half> %res
}

define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) {
; CHECK-LABEL: fp_to_uint_2f16_to_2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = fptoui <2 x half> %x to <2 x i64>
  ret <2 x i64> %res
}

declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8)

define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
  ret <2 x i64> %res
}

define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
  ret <2 x i64> %res
}

define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
  ret <2 x i64> %res
}

declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8)

define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
  ret <4 x i64> %res
}

define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
  ret <4 x i64> %res
}

define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
  ret <4 x i64> %res
}

declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8)

define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
  ret <2 x i64> %res
}

define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
  ret <2 x i64> %res
}

define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
  ret <2 x i64> %res
}

declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8)

define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
  ret <4 x i64> %res
}

define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
  ret <4 x i64> %res
}

define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
  ret <4 x i64> %res
}

declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)

define <8 x half> @test_sqrt_ph_128(<8 x half> %a0) {
; CHECK-LABEL: test_sqrt_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
  ret <8 x half> %1
}

define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_128_fast:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %xmm0, %xmm0
; CHECK-NEXT:    vmulph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
  %2 = fdiv fast <8 x half> %a1, %1
  ret <8 x half> %2
}

define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_128_fast2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
  ret <8 x half> %1
}

define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_sqrt_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> %passthru
  ret <8 x half> %3
}

define <8 x half> @test_maskz_sqrt_ph_128(<8 x half> %a0, i8 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> zeroinitializer
  ret <8 x half> %3
}

define <16 x half> @test_sqrt_ph_256(<16 x half> %a0) {
; CHECK-LABEL: test_sqrt_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtph %ymm0, %ymm0
; CHECK-NEXT:    retq
  %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
  ret <16 x half> %1
}

define <16 x half> @test_sqrt_ph_256_fast(<16 x half> %a0, <16 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_256_fast:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %ymm0, %ymm0
; CHECK-NEXT:    vmulph %ymm0, %ymm1, %ymm0
; CHECK-NEXT:    retq
  %1 = call fast <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
  %2 = fdiv fast <16 x half> %a1, %1
  ret <16 x half> %2
}

define <16 x half> @test_mask_sqrt_ph_256(<16 x half> %a0, <16 x half> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_sqrt_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph %ymm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> %passthru
  ret <16 x half> %3
}

define <16 x half> @test_maskz_sqrt_ph_256(<16 x half> %a0, i16 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> zeroinitializer
  ret <16 x half> %3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half>, <16 x half>, i16)

define <8 x half> @test_rsqrt_ph_128(<8 x half> %a0) {
; CHECK-LABEL: test_rsqrt_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half> %a0, <8 x half> zeroinitializer, i8 -1)
  ret <8 x half> %res
}

define <16 x half> @test_rsqrt_ph_256(<16 x half> %a0) {
; CHECK-LABEL: test_rsqrt_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half> %a0, <16 x half> zeroinitializer, i16 -1)
  ret <16 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half>, <16 x half>, i16)

define <8 x half> @test_rcp_ph_128(<8 x half> %a0, <8 x half> %a1, i8 %mask) {
; CHECK-LABEL: test_rcp_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrcpph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half> %a0, <8 x half> %a1, i8 %mask)
  ret <8 x half> %res
}

define <16 x half> @test_rcp_ph_256(<16 x half> %a0, <16 x half> %a1, i16 %mask) {
; CHECK-LABEL: test_rcp_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrcpph %ymm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half> %a0, <16 x half> %a1, i16 %mask)
  ret <16 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half>, i32, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half>, i32, <16 x half>, i16)

define <8 x half>@test_int_x86_avx512_mask_reduce_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vreduceph $8, %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vreduceph $4, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
  %res2 = fadd <8 x half> %res, %res1
  ret <8 x half> %res2
}

define <16 x half>@test_int_x86_avx512_mask_reduce_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vreduceph $8, %ymm0, %ymm1 {%k1}
; CHECK-NEXT:    vreduceph $4, %ymm0, %ymm0
; CHECK-NEXT:    vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
  %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
  %res2 = fadd <16 x half> %res, %res1
  ret <16 x half> %res2
}

declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32)
declare <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half>, i32)

define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vfpclassph $2, %xmm0, %k1
; CHECK-NEXT:    vfpclassph $4, %xmm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    # kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
  %res = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 4)
  %res1 = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 2)
  %1 = and <8 x i1> %res1, %res
  %2 = bitcast <8 x i1> %1 to i8
  ret i8 %2
}

define i16 @test_int_x86_avx512_fpclass_ph_256(<16 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vfpclassph $2, %ymm0, %k1
; CHECK-NEXT:    vfpclassph $4, %ymm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 4)
  %res1 = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 2)
  %1 = and <16 x i1> %res1, %res
  %2 = bitcast <16 x i1> %1 to i16
  ret i16 %2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half>, <16 x half>, i16)

define <8 x half>@test_int_x86_avx512_getexp_ph_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_getexp_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vgetexpph %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 -1)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_getexp_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetexpph %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> %x1, i8 %x2)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_maskz_getexp_ph_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetexpph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 %x2)
  ret <8 x half> %res
}

define <16 x half>@test_int_x86_avx512_getexp_ph_256(<16 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_getexp_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vgetexpph %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 -1)
  ret <16 x half> %res
}

define <16 x half>@test_int_x86_avx512_mask_getexp_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetexpph %ymm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> %x1, i16 %x2)
  ret <16 x half> %res
}

define <16 x half>@test_int_x86_avx512_maskz_getexp_ph_256(<16 x half> %x0, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetexpph %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 %x2)
  ret <16 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half>, i32, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half>, i32, <16 x half>, i16)

define <8 x half>@test_int_x86_avx512_mask_getmant_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetmantph $8, %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vgetmantph $4, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
  %res2 = fadd <8 x half> %res, %res1
  ret <8 x half> %res2
}

define <16 x half>@test_int_x86_avx512_mask_getmant_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetmantph $8, %ymm0, %ymm1 {%k1}
; CHECK-NEXT:    vgetmantph $4, %ymm0, %ymm0
; CHECK-NEXT:    vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
  %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
  %res2 = fadd <16 x half> %res, %res1
  ret <16 x half> %res2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half>, i32, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half>, i32, <16 x half>, i16)

define <8 x half>@test_int_x86_avx512_mask_rndscale_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrndscaleph $8, %xmm0, %xmm1 {%k1}
; CHECK-NEXT:    vrndscaleph $4, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
  %res2 = fadd <8 x half> %res, %res1
  ret <8 x half> %res2
}

define <16 x half>@test_int_x86_avx512_mask_rndscale_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrndscaleph $8, %ymm0, %ymm1 {%k1}
; CHECK-NEXT:    vrndscaleph $4, %ymm0, %ymm0
; CHECK-NEXT:    vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
  %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
  %res2 = fadd <16 x half> %res, %res1
  ret <16 x half> %res2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half>, <8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half>, <16 x half>, <16 x half>, i16)

define <8 x half>@test_int_x86_avx512_scalef_ph_128(<8 x half> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_scalef_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vscalefph %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 -1)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vscalefph %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x3 to <8 x i1>
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_maskz_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_128:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vscalefph %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x3 to <8 x i1>
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 %x3)
  ret <8 x half> %res
}

define <16 x half>@test_int_x86_avx512_scalef_ph_256(<16 x half> %x0, <16 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_scalef_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vscalefph %ymm1, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 -1)
  ret <16 x half> %res
}

define <16 x half>@test_int_x86_avx512_mask_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vscalefph %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT:    vmovaps %ymm2, %ymm0
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x3 to <16 x i1>
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3)
  ret <16 x half> %res
}

define <16 x half>@test_int_x86_avx512_maskz_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_256:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vscalefph %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x3 to <16 x i1>
  %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 %x3)
  ret <16 x half> %res
}