llvm/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 | FileCheck %s

declare <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16>, i32)

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = sitofp <32 x i16> %arg0 to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %scalar = load i16, ptr %arg0
  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %scalar = load i16, ptr %arg0
  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
  %res0 = sitofp <32 x i16> %val to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = sitofp <32 x i16> %arg0 to <32 x half>
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = sitofp <32 x i16> %arg0 to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %val = load <32 x i16>, ptr %arg0
  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %val = load <32 x i16>, ptr %arg0
  %res0 = sitofp <32 x i16> %val to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half>, <32 x i16>, i32, i32)

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2w %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtph2w (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %scalar = load half, ptr %arg0
  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2w {rd-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2w %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2w %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtph2w (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %val = load <32 x half>, ptr %arg0
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}


declare <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16>, i32)

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = uitofp <32 x i16> %arg0 to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %scalar = load i16, ptr %arg0
  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %scalar = load i16, ptr %arg0
  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
  %res0 = uitofp <32 x i16> %val to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2(<32 x i16> %arg0) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = uitofp <32 x i16> %arg0 to <32 x half>
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %res0 = uitofp <32 x i16> %arg0 to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtuw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %val = load <32 x i16>, ptr %arg0
  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtuw2ph (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %msk = bitcast i32 %mask to <32 x i1>
  %val = load <32 x i16>, ptr %arg0
  %res0 = uitofp <32 x i16> %val to <32 x half>
  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
  ret <32 x half> %res
}

declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half>, <32 x i16>, i32, i32)

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2uw %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtph2uw (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %scalar = load half, ptr %arg0
  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2uw {rd-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtph2uw %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2uw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvtph2uw (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %val = load <32 x half>, ptr %arg0
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half>, <32 x i16>, i32, i32)

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2w %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvttph2w (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %scalar = load half, ptr %arg0
  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_sae:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2w {sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvttph2w (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %val = load <32 x half>, ptr %arg0
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half>, <32 x i16>, i32, i32)

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvttph2uw (%rdi){1to32}, %zmm0 {%k1}
; CHECK-NEXT:    retq
  %scalar = load half, ptr %arg0
  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_sae:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uw {sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_z(<32 x half> %arg0, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
  ret <32 x i16> %res
}

define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vcvttph2uw (%rdi), %zmm0 {%k1}
; CHECK-NEXT:    retq
  %val = load <32 x half>, ptr %arg0
  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
  ret <32 x i16> %res
}