llvm/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2  -O3 | FileCheck %s --check-prefixes=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c  -O3 | FileCheck %s --check-prefixes=AVX,F16C
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  -O3 | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64

declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)

define half @sitofp_i1tof16(i1 %x) #0 {
; SSE2-LABEL: sitofp_i1tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    andb $1, %dil
; SSE2-NEXT:    negb %dil
; SSE2-NEXT:    movsbl %dil, %eax
; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: sitofp_i1tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    andb $1, %dil
; AVX-NEXT:    negb %dil
; AVX-NEXT:    movsbl %dil, %eax
; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: sitofp_i1tof16:
; X86:       # %bb.0:
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    andb $1, %al
; X86-NEXT:    negb %al
; X86-NEXT:    movsbl %al, %eax
; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: sitofp_i1tof16:
; X64:       # %bb.0:
; X64-NEXT:    andb $1, %dil
; X64-NEXT:    negb %dil
; X64-NEXT:    movsbl %dil, %eax
; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @sitofp_i8tof16(i8 %x) #0 {
; SSE2-LABEL: sitofp_i8tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    movsbl %dil, %eax
; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: sitofp_i8tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    movsbl %dil, %eax
; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: sitofp_i8tof16:
; X86:       # %bb.0:
; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: sitofp_i8tof16:
; X64:       # %bb.0:
; X64-NEXT:    movsbl %dil, %eax
; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @sitofp_i16tof16(i16 %x) #0 {
; SSE2-LABEL: sitofp_i16tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    movswl %di, %eax
; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: sitofp_i16tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    movswl %di, %eax
; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: sitofp_i16tof16:
; X86:       # %bb.0:
; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: sitofp_i16tof16:
; X64:       # %bb.0:
; X64-NEXT:    movswl %di, %eax
; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @sitofp_i32tof16(i32 %x) #0 {
; SSE2-LABEL: sitofp_i32tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    cvtsi2ss %edi, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: sitofp_i32tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: sitofp_i32tof16:
; X86:       # %bb.0:
; X86-NEXT:    vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: sitofp_i32tof16:
; X64:       # %bb.0:
; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @sitofp_i64tof16(i64 %x) #0 {
; SSE2-LABEL: sitofp_i64tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    cvtsi2ss %rdi, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: sitofp_i64tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: sitofp_i64tof16:
; X86:       # %bb.0:
; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT:    vcvtqq2ph %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: sitofp_i64tof16:
; X64:       # %bb.0:
; X64-NEXT:    vcvtsi2sh %rdi, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @uitofp_i1tof16(i1 %x) #0 {
; SSE2-LABEL: uitofp_i1tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    andl $1, %edi
; SSE2-NEXT:    cvtsi2ss %edi, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: uitofp_i1tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    andl $1, %edi
; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: uitofp_i1tof16:
; X86:       # %bb.0:
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    andb $1, %al
; X86-NEXT:    movzbl %al, %eax
; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: uitofp_i1tof16:
; X64:       # %bb.0:
; X64-NEXT:    andl $1, %edi
; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @uitofp_i8tof16(i8 %x) #0 {
; SSE2-LABEL: uitofp_i8tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    movzbl %dil, %eax
; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: uitofp_i8tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    movzbl %dil, %eax
; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: uitofp_i8tof16:
; X86:       # %bb.0:
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: uitofp_i8tof16:
; X64:       # %bb.0:
; X64-NEXT:    movzbl %dil, %eax
; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @uitofp_i16tof16(i16 %x) #0 {
; SSE2-LABEL: uitofp_i16tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    movzwl %di, %eax
; SSE2-NEXT:    cvtsi2ss %eax, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; AVX-LABEL: uitofp_i16tof16:
; AVX:       # %bb.0:
; AVX-NEXT:    movzwl %di, %eax
; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT:    vmovd %xmm0, %eax
; AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
;
; X86-LABEL: uitofp_i16tof16:
; X86:       # %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: uitofp_i16tof16:
; X64:       # %bb.0:
; X64-NEXT:    movzwl %di, %eax
; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @uitofp_i32tof16(i32 %x) #0 {
; SSE2-LABEL: uitofp_i32tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    movl %edi, %eax
; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; F16C-LABEL: uitofp_i32tof16:
; F16C:       # %bb.0:
; F16C-NEXT:    movl %edi, %eax
; F16C-NEXT:    vcvtsi2ss %rax, %xmm0, %xmm0
; F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT:    vmovd %xmm0, %eax
; F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; F16C-NEXT:    retq
;
; AVX512-LABEL: uitofp_i32tof16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT:    vmovd %xmm0, %eax
; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX512-NEXT:    retq
;
; X86-LABEL: uitofp_i32tof16:
; X86:       # %bb.0:
; X86-NEXT:    vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: uitofp_i32tof16:
; X64:       # %bb.0:
; X64-NEXT:    vcvtusi2sh %edi, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

define half @uitofp_i64tof16(i64 %x) #0 {
; SSE2-LABEL: uitofp_i64tof16:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movq %rdi, %rax
; SSE2-NEXT:    shrq %rax
; SSE2-NEXT:    movl %edi, %ecx
; SSE2-NEXT:    andl $1, %ecx
; SSE2-NEXT:    orq %rax, %rcx
; SSE2-NEXT:    testq %rdi, %rdi
; SSE2-NEXT:    cmovnsq %rdi, %rcx
; SSE2-NEXT:    cvtsi2ss %rcx, %xmm1
; SSE2-NEXT:    movaps %xmm1, %xmm0
; SSE2-NEXT:    addss %xmm1, %xmm0
; SSE2-NEXT:    js .LBB9_2
; SSE2-NEXT:  # %bb.1:
; SSE2-NEXT:    movaps %xmm1, %xmm0
; SSE2-NEXT:  .LBB9_2:
; SSE2-NEXT:    pushq %rax
; SSE2-NEXT:    callq __truncsfhf2@PLT
; SSE2-NEXT:    popq %rax
; SSE2-NEXT:    retq
;
; F16C-LABEL: uitofp_i64tof16:
; F16C:       # %bb.0:
; F16C-NEXT:    movq %rdi, %rax
; F16C-NEXT:    shrq %rax
; F16C-NEXT:    movl %edi, %ecx
; F16C-NEXT:    andl $1, %ecx
; F16C-NEXT:    orq %rax, %rcx
; F16C-NEXT:    testq %rdi, %rdi
; F16C-NEXT:    cmovnsq %rdi, %rcx
; F16C-NEXT:    vcvtsi2ss %rcx, %xmm0, %xmm0
; F16C-NEXT:    jns .LBB9_2
; F16C-NEXT:  # %bb.1:
; F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
; F16C-NEXT:  .LBB9_2:
; F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT:    vmovd %xmm0, %eax
; F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; F16C-NEXT:    retq
;
; AVX512-LABEL: uitofp_i64tof16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT:    vmovd %xmm0, %eax
; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
; AVX512-NEXT:    retq
;
; X86-LABEL: uitofp_i64tof16:
; X86:       # %bb.0:
; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT:    vcvtuqq2ph %xmm0, %xmm0
; X86-NEXT:    retl
;
; X64-LABEL: uitofp_i64tof16:
; X64:       # %bb.0:
; X64-NEXT:    vcvtusi2sh %rdi, %xmm0, %xmm0
; X64-NEXT:    retq
  %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
                                               metadata !"round.dynamic",
                                               metadata !"fpexcept.strict") #0
  ret half %result
}

attributes #0 = { strictfp nounwind }