llvm/llvm/test/CodeGen/X86/select-phi-s16-fp.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mcpu=generic -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s

; For all these tests we disable optimizations through function attributes
; because the code we are exercising here needs phis and we want to keep the
; IR small.

; This code used to crash in SDISel because bf16 was promoted to f32 through
; a `f32 = vector_extract_elt <1 x bf16>, i32 0`, which is illegal.
; The invalid SDNode and thus, the crash was only exposed by the constant
; folding.
define void @phi_vec1bf16_to_f32_with_const_folding(ptr %dst) #0 {
; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset %rbx, -16
; CHECK-NEXT:    movq %rdi, %rbx
; CHECK-NEXT:    jmp LBB0_1
; CHECK-NEXT:  LBB0_1: ## %bb
; CHECK-NEXT:    xorps %xmm0, %xmm0
; CHECK-NEXT:    callq ___truncsfbf2
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, 2(%rbx)
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    retq
entry:
  br label %bb

bb:
  %phi = phi <1 x bfloat> [ zeroinitializer, %entry ]
  %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
  store <2 x bfloat> %res, ptr %dst
  ret void
}
; Same as phi_vec1bf16_to_f32_with_const_folding but without the constant
; folding.
; This test exercises the same invalid SDNode, but it happened to work by
; accident before. Here we make sure the fix also work as expected in the
; non-constant folding case.
define void @phi_vec1bf16_to_f32(ptr %src, ptr %dst) #0 {
; CHECK-LABEL: phi_vec1bf16_to_f32:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset %rbx, -16
; CHECK-NEXT:    movq %rsi, %rbx
; CHECK-NEXT:    movzwl (%rdi), %eax
; CHECK-NEXT:    shll $16, %eax
; CHECK-NEXT:    movd %eax, %xmm0
; CHECK-NEXT:    jmp LBB1_1
; CHECK-NEXT:  LBB1_1: ## %bb
; CHECK-NEXT:    callq ___truncsfbf2
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, 2(%rbx)
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    retq
entry:
  %input = load <1 x bfloat>, ptr %src
  br label %bb

bb:
  %phi = phi <1 x bfloat> [ %input, %entry ]
  %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
  store <2 x bfloat> %res, ptr %dst
  ret void
}


; Half type is legal on x86 so nothing special here, but it
; doesn't hurt to be thorough.
define void @phi_vec1half_with_const_folding(ptr %dst) #0 {
; CHECK-LABEL: phi_vec1half_with_const_folding:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    xorps %xmm0, %xmm0
; CHECK-NEXT:    jmp LBB2_1
; CHECK-NEXT:  LBB2_1: ## %bb
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, 2(%rdi)
; CHECK-NEXT:    retq
entry:
  br label %bb

bb:
  %phi = phi <1 x half> [ zeroinitializer, %entry ]
  %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
  store <2 x half> %res, ptr %dst
  ret void
}

; Half type is legal on x86 so nothing special here, but it
; doesn't hurt to be thorough.
; Same as phi_vec1half_with_constant_folding but without the constant folding.
define void @phi_vec1half(ptr %src, ptr %dst) #0 {
; CHECK-LABEL: phi_vec1half:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pinsrw $0, (%rdi), %xmm0
; CHECK-NEXT:    jmp LBB3_1
; CHECK-NEXT:  LBB3_1: ## %bb
; CHECK-NEXT:    pextrw $0, %xmm0, %eax
; CHECK-NEXT:    movw %ax, 2(%rsi)
; CHECK-NEXT:    retq
entry:
  %input = load <1 x half>, ptr %src
  br label %bb

bb:
  %phi = phi <1 x half> [ %input, %entry ]
  %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
  store <2 x half> %res, ptr %dst
  ret void
}

attributes #0 = { noinline optnone }