; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s
declare i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half>, <8 x half>, i32, i32)
define i32 @test_x86_avx512fp16_ucomi_sh_lt(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_x86_avx512fp16_ucomi_sh_lt:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpngesh %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %a0, <8 x half> %a1, i32 9, i32 4)
ret i32 %res
}
declare <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half>, i32) nounwind readnone
define <32 x half> @test_sqrt_ph_512(<32 x half> %a0) {
; CHECK-LABEL: test_sqrt_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtph %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
ret <32 x half> %1
}
define <32 x half> @test_sqrt_ph_512_fast(<32 x half> %a0, <32 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_512_fast:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %zmm0, %zmm0
; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
%2 = fdiv fast <32 x half> %a1, %1
ret <32 x half> %2
}
define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrt" {
; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %zmm0, %zmm0
; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
%2 = fdiv fast <32 x half> %a1, %1
ret <32 x half> %2
}
define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrth:1" {
; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %zmm0, %zmm2
; CHECK-NEXT: vmulph %zmm2, %zmm0, %zmm0
; CHECK-NEXT: vfmadd213ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm0
; CHECK-NEXT: vmulph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm2
; CHECK-NEXT: vmulph %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
%2 = fdiv fast <32 x half> %a1, %1
ret <32 x half> %2
}
define <32 x half> @test_mask_sqrt_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_sqrt_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
%2 = bitcast i32 %mask to <32 x i1>
%3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru
ret <32 x half> %3
}
define <32 x half> @test_maskz_sqrt_ph_512(<32 x half> %a0, i32 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
%2 = bitcast i32 %mask to <32 x i1>
%3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
ret <32 x half> %3
}
declare <32 x half> @llvm.sqrt.v32f16(<32 x half>)
define <32 x half> @test_sqrt_round_ph_512(<32 x half> %a0) {
; CHECK-LABEL: test_sqrt_round_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
ret <32 x half> %1
}
define <32 x half> @test_mask_sqrt_round_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_sqrt_round_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
%2 = bitcast i32 %mask to <32 x i1>
%3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru
ret <32 x half> %3
}
define <32 x half> @test_maskz_sqrt_round_ph_512(<32 x half> %a0, i32 %mask) {
; CHECK-LABEL: test_maskz_sqrt_round_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
%2 = bitcast i32 %mask to <32 x i1>
%3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
ret <32 x half> %3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8, i32) nounwind readnone
define <8 x half> @test_sqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 4)
ret <8 x half> %res
}
define half @test_sqrt_sh2(half %a0, half %a1) {
; CHECK-LABEL: test_sqrt_sh2:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmulsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = call fast half @llvm.sqrt.f16(half %a0)
%2 = fdiv fast half %a1, %1
ret half %2
}
define half @test_sqrt_sh3(half %a0, half %a1) {
; CHECK-LABEL: test_sqrt_sh3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call fast half @llvm.sqrt.f16(half %a0)
ret half %1
}
declare half @llvm.sqrt.f16(half)
define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sh_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 10)
ret <8 x half> %res
}
define <8 x half> @test_sqrt_sh_nomask(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
; CHECK-LABEL: test_sqrt_sh_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 -1, i32 4)
ret <8 x half> %res
}
define <8 x half> @test_sqrt_sh_z(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sh_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 %mask, i32 10)
ret <8 x half> %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half>, <32 x half>, i32)
declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8)
define <32 x half> @test_rsqrt_ph_512(<32 x half> %a0) {
; CHECK-LABEL: test_rsqrt_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtph %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> %a0, <32 x half> zeroinitializer, i32 -1)
ret <32 x half> %res
}
define <8 x half> @test_rsqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
; CHECK-LABEL: test_rsqrt_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> %a2, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) {
; CHECK-LABEL: test_rsqrt_sh_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vrsqrtsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a1 = load <8 x half>, ptr %a1ptr
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_rsqrt_sh_maskz(<8 x half> %a0, i8 %mask) {
; CHECK-LABEL: test_rsqrt_sh_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 %mask)
ret <8 x half> %res
}
define <8 x half> @test_rsqrt_sh_mask(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) {
; CHECK-LABEL: test_rsqrt_sh_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrsqrtsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask)
ret <8 x half> %res
}
declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32)
define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassph $2, %zmm0, %k1
; CHECK-NEXT: vfpclassph $4, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 4)
%res1 = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 2)
%1 = and <32 x i1> %res1, %res
%2 = bitcast <32 x i1> %1 to i32
ret i32 %2
}
declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasssh $4, %xmm0, %k1
; CHECK-NEXT: vfpclasssh $2, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 %res)
ret i8 %res1
}
define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasssh $4, (%rdi), %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%x0 = load <8 x half>, ptr %x0ptr
%res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 -1)
ret i8 %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half>, <32 x half>, i32)
define <32 x half> @test_rcp_ph_512(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
; CHECK-LABEL: test_rcp_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrcpph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> %a0, <32 x half> %a1, i32 %mask)
ret <32 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half>, <8 x half>, <8 x half>, i8)
define <8 x half> @test_rcp_sh(<8 x half> %a0) {
; CHECK-LABEL: test_rcp_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vrcpsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 -1)
ret <8 x half> %res
}
define <8 x half> @test_rcp_sh_load(<8 x half> %a0, ptr %a1ptr) {
; CHECK-LABEL: test_rcp_sh_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vrcpsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a1 = load <8 x half>, ptr %a1ptr
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 -1)
ret <8 x half> %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half>, i32, <32 x half>, i32, i32)
define <32 x half>@test_int_x86_avx512_mask_reduce_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vreduceph $8, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vreduceph $4, {sae}, %zmm0, %zmm0
; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
%res1 = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
%res2 = fadd <32 x half> %res, %res1
ret <32 x half> %res2
}
declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)
define <8 x half>@test_int_x86_avx512_mask_reduce_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vreducesh $4, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_reduce_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vreducesh $4, {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8)
ret <8 x half> %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32)
define <32 x half>@test_int_x86_avx512_mask_rndscale_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrndscaleph $8, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vrndscaleph $4, {sae}, %zmm0, %zmm0
; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
%res1 = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
%res2 = fadd <32 x half> %res, %res1
ret <32 x half> %res2
}
declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)
define <8 x half>@test_int_x86_avx512_mask_rndscale_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vrndscalesh $4, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_rndscale_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vrndscalesh $4, {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8)
ret <8 x half> %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half>, <32 x half>, i32, i32)
define <32 x half>@test_int_x86_avx512_mask_getexp_ph_512(<32 x half> %x0, <32 x half> %x1, i32 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetexpph %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vgetexpph {sae}, %zmm0, %zmm0
; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> %x1, i32 %x2, i32 4)
%res2 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> zeroinitializer, i32 -1, i32 8)
%res3 = fadd <32 x half> %res1, %res2
ret <32 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32)
define <8 x half>@test_int_x86_avx512_mask_getexp_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetexpsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_getexp_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vgetexpsh {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x1ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vgetexpsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%x1 = load <8 x half>, ptr %x1ptr
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4)
ret <8 x half> %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half>, i32, <32 x half>, i32, i32)
define <32 x half>@test_int_x86_avx512_mask_getmant_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetmantph $8, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vgetmantph $4, {sae}, %zmm0, %zmm0
; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
%res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
%res2 = fadd <32 x half> %res, %res1
ret <32 x half> %res2
}
declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half>, <8 x half>, i32, <8 x half>, i8, i32)
define <8 x half>@test_int_x86_avx512_mask_getmant_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 %x4, i32 4)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_getmant_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 -1, i32 4)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_getmant_sh_z(<8 x half> %x0, <8 x half> %x1, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> zeroinitializer, i8 %x4, i32 4)
ret <8 x half> %res
}
declare <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half>, <32 x half>, <32 x half>, i32, i32)
define <32 x half>@test_int_x86_avx512_mask_scalef_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vscalefph {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vscalefph {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vaddph %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%mask = bitcast i32 %x3 to <32 x i1>
%res1 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3, i32 11)
%res2 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> zeroinitializer, i32 -1, i32 8)
%res3 = fadd <32 x half> %res1, %res2
ret <32 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32)
define <8 x half>@test_int_x86_avx512_mask_scalef_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vscalefsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_scalef_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vscalefsh {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8)
ret <8 x half> %res
}
define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x1ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vscalefsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%x1 = load <8 x half>, ptr %x1ptr
%res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4)
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vaddsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%val.half = load half,ptr %ptr
%val = insertelement <8 x half> undef, half %val.half, i32 0
%res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
%res3 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
ret <8 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsubsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vsubsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%val.half = load half,ptr %ptr
%val = insertelement <8 x half> undef, half %val.half, i32 0
%res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
%res3 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
ret <8 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vmulsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmulsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%val.half = load half,ptr %ptr
%val = insertelement <8 x half> undef, half %val.half, i32 0
%res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
%res3 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
ret <8 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vdivsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vdivsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%val.half = load half,ptr %ptr
%val = insertelement <8 x half> undef, half %val.half, i32 0
%res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
%res3 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
ret <8 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_min_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vminsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vminsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%val.half = load half,ptr %ptr
%val = insertelement <8 x half> undef, half %val.half, i32 0
%res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
%res3 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
ret <8 x half> %res3
}
declare <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_max_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vmaxsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmaxsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%val.half = load half,ptr %ptr
%val = insertelement <8 x half> undef, half %val.half, i32 0
%res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
%res3 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
ret <8 x half> %res3
}
declare i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half>, <8 x half>, i32, i8, i32)
define i8 @test_int_x86_avx512_mask_cmp_sh(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 %x3, i32 4)
ret i8 %res2
}
define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh_all:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcmplesh %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovd %k0, %ecx
; CHECK-NEXT: vcmpunordsh {sae}, %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovd %k0, %edx
; CHECK-NEXT: vcmpneqsh %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %esi
; CHECK-NEXT: vcmpnltsh {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: andb %cl, %dl
; CHECK-NEXT: andb %sil, %al
; CHECK-NEXT: andb %dl, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res1 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 2, i8 -1, i32 4)
%res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 -1, i32 8)
%res3 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 4, i8 %x3, i32 4)
%res4 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 5, i8 %x3, i32 8)
%res11 = and i8 %res1, %res2
%res12 = and i8 %res3, %res4
%res13 = and i8 %res11, %res12
ret i8 %res13
}
declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32)
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = bitcast i16 %x2 to <16 x i1>
%res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
ret <16 x half> %res
}
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = bitcast i16 %x2 to <16 x i1>
%res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
ret <16 x half> %res
}
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
ret <16 x half> %res
}
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = bitcast i16 %x2 to <16 x i1>
%res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
ret <16 x half> %res
}
define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) {
; CHECK-LABEL: sint_to_fp_16i32_to_16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
; CHECK-NEXT: retq
%res = sitofp <16 x i32> %x to <16 x half>
ret <16 x half> %res
}
declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32)
define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = bitcast i16 %x2 to <16 x i1>
%res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
ret <16 x half> %res
}
define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
; CHECK-NEXT: retq
%res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
ret <16 x half> %res
}
define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = bitcast i16 %x2 to <16 x i1>
%res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
ret <16 x half> %res
}
define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) {
; CHECK-LABEL: uint_to_fp_16i32_to_16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
; CHECK-NEXT: retq
%res = uitofp <16 x i32> %x to <16 x half>
ret <16 x half> %res
}
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32)
define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1}
; CHECK-NEXT: vcvtph2dq {rn-sae}, %ymm0, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32)
define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1}
; CHECK-NEXT: vcvtph2udq {rn-sae}, %ymm0, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32)
define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm1 {%k1}
; CHECK-NEXT: vcvttph2dq {sae}, %ymm0, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32)
define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2udq %ymm0, %zmm1 {%k1}
; CHECK-NEXT: vcvttph2udq {sae}, %ymm0, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32)
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32)
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
ret <8 x half> %res
}
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%mask = bitcast i8 %x2 to <8 x i1>
%res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
ret <8 x half> %res
}
declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32)
define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vcvtph2qq {rn-sae}, %xmm0, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
%res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vcvtph2uqq {rn-sae}, %xmm0, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
%res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8)
%res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32)
define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsh2si %xmm0, %ecx
; CHECK-NEXT: vcvtsh2si {rz-sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4)
%res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11)
%res = add i32 %res1, %res2
ret i32 %res
}
declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32)
define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsh2si %xmm0, %rcx
; CHECK-NEXT: vcvtsh2si {ru-sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4)
%res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10)
%res = add i64 %res1, %res2
ret i64 %res
}
declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32)
define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttsh2si %xmm0, %ecx
; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4)
%res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8)
%res = add i32 %res1, %res2
ret i32 %res
}
declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32)
define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttsh2si %xmm0, %rcx
; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4)
%res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8)
%res = add i64 %res1, %res2
ret i64 %res
}
declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32)
define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsh2usi %xmm0, %ecx
; CHECK-NEXT: vcvtsh2usi {rd-sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4)
%res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9)
%res = add i32 %res1, %res2
ret i32 %res
}
declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32)
define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsh2usi %xmm0, %rcx
; CHECK-NEXT: vcvtsh2usi {ru-sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4)
%res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10)
%res = add i64 %res1, %res2
ret i64 %res
}
declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32)
define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttsh2usi %xmm0, %ecx
; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: retq
%res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4)
%res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8)
%res = add i32 %res1, %res2
ret i32 %res
}
declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32)
define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttsh2usi %xmm0, %rcx
; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
%res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4)
%res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8)
%res = add i64 %res1, %res2
ret i64 %res
}
declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32)
define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2sh %edi, %xmm0, %xmm1
; CHECK-NEXT: vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
%res = fadd <8 x half> %res1, %res2
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32)
define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm1
; CHECK-NEXT: vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8)
%res = fadd <8 x half> %res1, %res2
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32)
define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtusi2sh %edi, %xmm0, %xmm1
; CHECK-NEXT: vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
%res = fadd <8 x half> %res1, %res2
ret <8 x half> %res
}
declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32)
define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm1
; CHECK-NEXT: vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9)
%res = fadd <8 x half> %res1, %res2
ret <8 x half> %res
}
define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind {
; CHECK-LABEL: test_mm256_castph128_ph256_freeze:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: retq
%a1 = freeze <8 x half> poison
%res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x half> %res
}
define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind {
; CHECK-LABEL: test_mm512_castph128_ph512_freeze:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm0, %xmm0
; CHECK-NEXT: retq
%a1 = freeze <8 x half> poison
%res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <32 x half> %res
}
define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind {
; CHECK-LABEL: test_mm512_castph256_ph512_freeze:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; CHECK-NEXT: retq
%a1 = freeze <16 x half> poison
%res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <32 x half> %res
}