; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16
; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16
; Note: We could check more configurations, but anything with software
; emulation of fp16 generates a ton of assembly code and is not particularly
; interesting.
;----------------------------------------
; i8 input
;----------------------------------------
; uint8_t to float.
; - Go from i8 to i32: zext
; - Convert i32 to float
define float @uint8ToFloat(i8 %int8) {
; CHECK-NO_FP16-LABEL: uint8ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint8ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i8 %int8 to float
ret float %fp32
}
; vector uint8_t to float.
; Same as @uint8ToFloat but with vector types.
define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_uint8ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp <16 x i8> %int8 to <16 x float>
ret <16 x float> %fp32
}
; uint8_t to half.
;
; If no half support:
; - Go from i8 to i32: zext
; - Convert i32 to float
; - Trunc from float to half
;
; Else if half support:
; - Go from i8 to i32: zext
; - Convert i32 to half
define half @uint8ToHalf(i8 %int8) {
; CHECK-NO_FP16-LABEL: uint8ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint8ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i8 %int8 to half
ret half %fp32
}
; vector uint8_t to half.
;
; If no half support:
; - Go from i8 to i32: zext
; - Convert i32 to float
; - Trunc from float to half
;
; Else if half support:
; - Go from i8 to i16: zext
; - Convert i16 to half
;
; The difference with the scalar version (uint8ToHalf) is that we use i16
; for the intermediate type when we have half support.
define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_uint8ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp <16 x i8> %int8 to <16 x half>
ret <16 x half> %fp32
}
; Same as uint8_t but with the signed variant.
; I.e., use sext instead of zext.
define float @sint8ToFloat(i8 %int8) {
; CHECK-NO_FP16-LABEL: sint8ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movsbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint8ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i8 %int8 to float
ret float %fp32
}
define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_sint8ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vpmovsxbd %xmm0, %zmm0
; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp <16 x i8> %int8 to <16 x float>
ret <16 x float> %fp32
}
define half @sint8ToHalf(i8 %int8) {
; CHECK-NO_FP16-LABEL: sint8ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movsbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint8ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i8 %int8 to half
ret half %fp32
}
define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_sint8ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vpmovsxbw %xmm0, %ymm0
; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp <16 x i8> %int8 to <16 x half>
ret <16 x half> %fp32
}
;----------------------------------------
; i16 input
;----------------------------------------
; Similar lowering as i8, but with i16 as the input type.
define float @uint16ToFloat(i16 %int16) {
; CHECK-NO_FP16-LABEL: uint16ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzwl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint16ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzwl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i16 %int16 to float
ret float %fp32
}
define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_uint16ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp <16 x i16> %int16 to <16 x float>
ret <16 x float> %fp32
}
define half @uint16ToHalf(i16 %int16) {
; CHECK-NO_FP16-LABEL: uint16ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzwl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint16ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzwl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i16 %int16 to half
ret half %fp32
}
define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_uint16ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vcvtuw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp <16 x i16> %int16 to <16 x half>
ret <16 x half> %fp32
}
define float @sint16ToFloat(i16 %int16) {
; CHECK-NO_FP16-LABEL: sint16ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movswl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint16ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movswl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i16 %int16 to float
ret float %fp32
}
define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_sint16ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vpmovsxwd %ymm0, %zmm0
; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp <16 x i16> %int16 to <16 x float>
ret <16 x float> %fp32
}
define half @sint16ToHalf(i16 %int16) {
; CHECK-NO_FP16-LABEL: sint16ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movswl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint16ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movswl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i16 %int16 to half
ret half %fp32
}
define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_sint16ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp <16 x i16> %int16 to <16 x half>
ret <16 x half> %fp32
}