; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
define signext i16 @test_mm_cvtsi128_si16(<2 x i64> %A) local_unnamed_addr #0 {
; CHECK-LABEL: test_mm_cvtsi128_si16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovw %xmm0, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:
%0 = bitcast <2 x i64> %A to <8 x i16>
%vecext.i = extractelement <8 x i16> %0, i32 0
ret i16 %vecext.i
}
define <2 x i64> @test_mm_cvtsi16_si128(i16 signext %A) local_unnamed_addr #0 {
; CHECK-LABEL: test_mm_cvtsi16_si128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovw %edi, %xmm0
; CHECK-NEXT: retq
entry:
%vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %A, i32 0
%0 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
ret <2 x i64> %0
}
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = sitofp <8 x i32> %x0 to <8 x half>
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = sitofp <8 x i32> %x0 to <8 x half>
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
ret <8 x half> %res
}
define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) {
; CHECK-LABEL: sint_to_fp_8i32_to_8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = sitofp <8 x i32> %x to <8 x half>
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) {
; CHECK-LABEL: sint_to_fp_4i32_to_4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = sitofp <4 x i32> %x to <4 x half>
ret <4 x half> %res
}
define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) {
; CHECK-LABEL: sint_to_fp_2i32_to_2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = sitofp <2 x i32> %x to <2 x half>
ret <2 x half> %res
}
define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) {
; CHECK-LABEL: fp_to_sint_4f16_to_4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptosi <4 x half> %x to <4 x i32>
ret <4 x i32> %res
}
define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) {
; CHECK-LABEL: fp_to_sint_2f16_to_2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptosi <2 x half> %x to <2 x i32>
ret <2 x i32> %res
}
define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) {
; CHECK-LABEL: fp_to_sint_2f16_to_2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptosi <2 x half> %x to <2 x i16>
ret <2 x i16> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = uitofp <8 x i32> %x0 to <8 x half>
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = uitofp <8 x i32> %x0 to <8 x half>
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
ret <8 x half> %res
}
define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) {
; CHECK-LABEL: uint_to_fp_8i32_to_8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = uitofp <8 x i32> %x to <8 x half>
ret <8 x half> %res
}
define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) {
; CHECK-LABEL: fp_to_uint_8f16_to_8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = fptoui <8 x half> %x to <8 x i32>
ret <8 x i32> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) {
; CHECK-LABEL: uint_to_fp_4i32_to_4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = uitofp <4 x i32> %x to <4 x half>
ret <4 x half> %res
}
define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) {
; CHECK-LABEL: uint_to_fp_2i32_to_2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = uitofp <2 x i32> %x to <2 x half>
ret <2 x half> %res
}
define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) {
; CHECK-LABEL: fp_to_uint_4f16_to_4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptoui <4 x half> %x to <4 x i32>
ret <4 x i32> %res
}
define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) {
; CHECK-LABEL: fp_to_uint_2f16_to_2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptoui <2 x half> %x to <2 x i32>
ret <2 x i32> %res
}
define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) {
; CHECK-LABEL: fp_to_uint_2f16_to_2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptoui <2 x half> %x to <2 x i16>
ret <2 x i16> %res
}
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
ret <4 x i32> %res
}
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8)
define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2dq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
ret <8 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
ret <4 x i32> %res
}
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
ret <8 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
ret <4 x i32> %res
}
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
ret <8 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
ret <4 x i32> %res
}
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
ret <8 x i32> %res
}
define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
ret <8 x i32> %res
}
declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
ret <4 x double> %res
}
define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
ret <2 x double> %res
}
define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
ret <2 x double> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%x0 = load <4 x double>, ptr %px0, align 32
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%x0 = load <2 x double>, ptr %px0, align 16
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) {
; CHECK-LABEL: sint_to_fp_4i64_to_4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = sitofp <4 x i64> %x to <4 x half>
ret <4 x half> %res
}
define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) {
; CHECK-LABEL: fp_to_sint_4f16_to_4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = fptosi <4 x half> %x to <4 x i64>
ret <4 x i64> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) {
; CHECK-LABEL: sint_to_fp_2i64_to_2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = sitofp <2 x i64> %x to <2 x half>
ret <2 x half> %res
}
define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) {
; CHECK-LABEL: fp_to_sint_2f16_to_2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptosi <2 x half> %x to <2 x i64>
ret <2 x i64> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) {
; CHECK-LABEL: uint_to_fp_4i64_to_4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = uitofp <4 x i64> %x to <4 x half>
ret <4 x half> %res
}
define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) {
; CHECK-LABEL: fp_to_uint_4f16_to_4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = fptoui <4 x half> %x to <4 x i64>
ret <4 x i64> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8)
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) {
; CHECK-LABEL: uint_to_fp_2i64_to_2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = uitofp <2 x i64> %x to <2 x half>
ret <2 x half> %res
}
define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) {
; CHECK-LABEL: fp_to_uint_2f16_to_2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = fptoui <2 x half> %x to <2 x i64>
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
ret <2 x i64> %res
}
define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
ret <2 x i64> %res
}
declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8)
define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
ret <4 x i64> %res
}
define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
ret <4 x i64> %res
}
define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
ret <4 x i64> %res
}
declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
ret <2 x i64> %res
}
define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
ret <2 x i64> %res
}
declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8)
define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
ret <4 x i64> %res
}
define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
ret <4 x i64> %res
}
define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
ret <4 x i64> %res
}
declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)
define <8 x half> @test_sqrt_ph_128(<8 x half> %a0) {
; CHECK-LABEL: test_sqrt_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtph %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
ret <8 x half> %1
}
define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_128_fast:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %xmm0, %xmm0
; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
%2 = fdiv fast <8 x half> %a1, %1
ret <8 x half> %2
}
define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_128_fast2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtph %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
ret <8 x half> %1
}
define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_sqrt_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x half> %1, <8 x half> %passthru
ret <8 x half> %3
}
define <8 x half> @test_maskz_sqrt_ph_128(<8 x half> %a0, i8 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
%2 = bitcast i8 %mask to <8 x i1>
%3 = select <8 x i1> %2, <8 x half> %1, <8 x half> zeroinitializer
ret <8 x half> %3
}
define <16 x half> @test_sqrt_ph_256(<16 x half> %a0) {
; CHECK-LABEL: test_sqrt_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtph %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
ret <16 x half> %1
}
define <16 x half> @test_sqrt_ph_256_fast(<16 x half> %a0, <16 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_256_fast:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %ymm0, %ymm0
; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%1 = call fast <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
%2 = fdiv fast <16 x half> %a1, %1
ret <16 x half> %2
}
define <16 x half> @test_mask_sqrt_ph_256(<16 x half> %a0, <16 x half> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_sqrt_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x half> %1, <16 x half> %passthru
ret <16 x half> %3
}
define <16 x half> @test_maskz_sqrt_ph_256(<16 x half> %a0, i16 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
%2 = bitcast i16 %mask to <16 x i1>
%3 = select <16 x i1> %2, <16 x half> %1, <16 x half> zeroinitializer
ret <16 x half> %3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half>, <16 x half>, i16)
define <8 x half> @test_rsqrt_ph_128(<8 x half> %a0) {
; CHECK-LABEL: test_rsqrt_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half> %a0, <8 x half> zeroinitializer, i8 -1)
ret <8 x half> %res
}
define <16 x half> @test_rsqrt_ph_256(<16 x half> %a0) {
; CHECK-LABEL: test_rsqrt_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %ymm0, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half> %a0, <16 x half> zeroinitializer, i16 -1)
ret <16 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half>, <16 x half>, i16)
define <8 x half> @test_rcp_ph_128(<8 x half> %a0, <8 x half> %a1, i8 %mask) {
; CHECK-LABEL: test_rcp_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrcpph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half> %a0, <8 x half> %a1, i8 %mask)
ret <8 x half> %res
}
define <16 x half> @test_rcp_ph_256(<16 x half> %a0, <16 x half> %a1, i16 %mask) {
; CHECK-LABEL: test_rcp_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrcpph %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half> %a0, <16 x half> %a1, i16 %mask)
ret <16 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half>, i32, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half>, i32, <16 x half>, i16)
define <8 x half>@test_int_x86_avx512_mask_reduce_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vreduceph $8, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vreduceph $4, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
%res2 = fadd <8 x half> %res, %res1
ret <8 x half> %res2
}
define <16 x half>@test_int_x86_avx512_mask_reduce_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vreduceph $8, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vreduceph $4, %ymm0, %ymm0
; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
%res1 = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
%res2 = fadd <16 x half> %res, %res1
ret <16 x half> %res2
}
declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32)
declare <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half>, i32)
define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassph $2, %xmm0, %k1
; CHECK-NEXT: vfpclassph $4, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 4)
%res1 = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 2)
%1 = and <8 x i1> %res1, %res
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
define i16 @test_int_x86_avx512_fpclass_ph_256(<16 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassph $2, %ymm0, %k1
; CHECK-NEXT: vfpclassph $4, %ymm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 4)
%res1 = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 2)
%1 = and <16 x i1> %res1, %res
%2 = bitcast <16 x i1> %1 to i16
ret i16 %2
}
declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half>, <16 x half>, i16)
define <8 x half>@test_int_x86_avx512_getexp_ph_128(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_getexp_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vgetexpph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 -1)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_getexp_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetexpph %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> %x1, i8 %x2)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_maskz_getexp_ph_128(<8 x half> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetexpph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 %x2)
ret <8 x half> %res
}
define <16 x half>@test_int_x86_avx512_getexp_ph_256(<16 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_getexp_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vgetexpph %ymm0, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 -1)
ret <16 x half> %res
}
define <16 x half>@test_int_x86_avx512_mask_getexp_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetexpph %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> %x1, i16 %x2)
ret <16 x half> %res
}
define <16 x half>@test_int_x86_avx512_maskz_getexp_ph_256(<16 x half> %x0, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetexpph %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 %x2)
ret <16 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half>, i32, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half>, i32, <16 x half>, i16)
define <8 x half>@test_int_x86_avx512_mask_getmant_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetmantph $8, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vgetmantph $4, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
%res2 = fadd <8 x half> %res, %res1
ret <8 x half> %res2
}
define <16 x half>@test_int_x86_avx512_mask_getmant_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetmantph $8, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vgetmantph $4, %ymm0, %ymm0
; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
%res1 = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
%res2 = fadd <16 x half> %res, %res1
ret <16 x half> %res2
}
declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half>, i32, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half>, i32, <16 x half>, i16)
define <8 x half>@test_int_x86_avx512_mask_rndscale_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrndscaleph $8, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vrndscaleph $4, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
%res2 = fadd <8 x half> %res, %res1
ret <8 x half> %res2
}
define <16 x half>@test_int_x86_avx512_mask_rndscale_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrndscaleph $8, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vrndscaleph $4, %ymm0, %ymm0
; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
%res1 = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
%res2 = fadd <16 x half> %res, %res1
ret <16 x half> %res2
}
declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half>, <8 x half>, <8 x half>, i8)
declare <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half>, <16 x half>, <16 x half>, i16)
define <8 x half>@test_int_x86_avx512_scalef_ph_128(<8 x half> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_scalef_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 -1)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%mask = bitcast i8 %x3 to <8 x i1>
%res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_maskz_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_128:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = bitcast i8 %x3 to <8 x i1>
%res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 %x3)
ret <8 x half> %res
}
define <16 x half>@test_int_x86_avx512_scalef_ph_256(<16 x half> %x0, <16 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_scalef_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 -1)
ret <16 x half> %res
}
define <16 x half>@test_int_x86_avx512_mask_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
%mask = bitcast i16 %x3 to <16 x i1>
%res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3)
ret <16 x half> %res
}
define <16 x half>@test_int_x86_avx512_maskz_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_256:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = bitcast i16 %x3 to <16 x i1>
%res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 %x3)
ret <16 x half> %res
}