; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX,F16C
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
define half @sitofp_i1tof16(i1 %x) #0 {
; SSE2-LABEL: sitofp_i1tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: andb $1, %dil
; SSE2-NEXT: negb %dil
; SSE2-NEXT: movsbl %dil, %eax
; SSE2-NEXT: cvtsi2ss %eax, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_i1tof16:
; AVX: # %bb.0:
; AVX-NEXT: andb $1, %dil
; AVX-NEXT: negb %dil
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: sitofp_i1tof16:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i1tof16:
; X64: # %bb.0:
; X64-NEXT: andb $1, %dil
; X64-NEXT: negb %dil
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @sitofp_i8tof16(i8 %x) #0 {
; SSE2-LABEL: sitofp_i8tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movsbl %dil, %eax
; SSE2-NEXT: cvtsi2ss %eax, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_i8tof16:
; AVX: # %bb.0:
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: sitofp_i8tof16:
; X86: # %bb.0:
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i8tof16:
; X64: # %bb.0:
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @sitofp_i16tof16(i16 %x) #0 {
; SSE2-LABEL: sitofp_i16tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movswl %di, %eax
; SSE2-NEXT: cvtsi2ss %eax, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_i16tof16:
; AVX: # %bb.0:
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: sitofp_i16tof16:
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i16tof16:
; X64: # %bb.0:
; X64-NEXT: movswl %di, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @sitofp_i32tof16(i32 %x) #0 {
; SSE2-LABEL: sitofp_i32tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: cvtsi2ss %edi, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_i32tof16:
; AVX: # %bb.0:
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: sitofp_i32tof16:
; X86: # %bb.0:
; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i32tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @sitofp_i64tof16(i64 %x) #0 {
; SSE2-LABEL: sitofp_i64tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: cvtsi2ss %rdi, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: sitofp_i64tof16:
; AVX: # %bb.0:
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: sitofp_i64tof16:
; X86: # %bb.0:
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vcvtqq2ph %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i64tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @uitofp_i1tof16(i1 %x) #0 {
; SSE2-LABEL: uitofp_i1tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: andl $1, %edi
; SSE2-NEXT: cvtsi2ss %edi, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: uitofp_i1tof16:
; AVX: # %bb.0:
; AVX-NEXT: andl $1, %edi
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: uitofp_i1tof16:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i1tof16:
; X64: # %bb.0:
; X64-NEXT: andl $1, %edi
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @uitofp_i8tof16(i8 %x) #0 {
; SSE2-LABEL: uitofp_i8tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: cvtsi2ss %eax, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: uitofp_i8tof16:
; AVX: # %bb.0:
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: uitofp_i8tof16:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i8tof16:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @uitofp_i16tof16(i16 %x) #0 {
; SSE2-LABEL: uitofp_i16tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movzwl %di, %eax
; SSE2-NEXT: cvtsi2ss %eax, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; AVX-LABEL: uitofp_i16tof16:
; AVX: # %bb.0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-LABEL: uitofp_i16tof16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i16tof16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @uitofp_i32tof16(i32 %x) #0 {
; SSE2-LABEL: uitofp_i32tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; F16C-LABEL: uitofp_i32tof16:
; F16C: # %bb.0:
; F16C-NEXT: movl %edi, %eax
; F16C-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; F16C-NEXT: retq
;
; AVX512-LABEL: uitofp_i32tof16:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; X86-LABEL: uitofp_i32tof16:
; X86: # %bb.0:
; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i32tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
define half @uitofp_i64tof16(i64 %x) #0 {
; SSE2-LABEL: uitofp_i64tof16:
; SSE2: # %bb.0:
; SSE2-NEXT: movq %rdi, %rax
; SSE2-NEXT: shrq %rax
; SSE2-NEXT: movl %edi, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: orq %rax, %rcx
; SSE2-NEXT: testq %rdi, %rdi
; SSE2-NEXT: cmovnsq %rdi, %rcx
; SSE2-NEXT: cvtsi2ss %rcx, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: js .LBB9_2
; SSE2-NEXT: # %bb.1:
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: .LBB9_2:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: callq __truncsfhf2@PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: retq
;
; F16C-LABEL: uitofp_i64tof16:
; F16C: # %bb.0:
; F16C-NEXT: movq %rdi, %rax
; F16C-NEXT: shrq %rax
; F16C-NEXT: movl %edi, %ecx
; F16C-NEXT: andl $1, %ecx
; F16C-NEXT: orq %rax, %rcx
; F16C-NEXT: testq %rdi, %rdi
; F16C-NEXT: cmovnsq %rdi, %rcx
; F16C-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
; F16C-NEXT: jns .LBB9_2
; F16C-NEXT: # %bb.1:
; F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
; F16C-NEXT: .LBB9_2:
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; F16C-NEXT: retq
;
; AVX512-LABEL: uitofp_i64tof16:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; X86-LABEL: uitofp_i64tof16:
; X86: # %bb.0:
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i64tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret half %result
}
attributes #0 = { strictfp nounwind }