llvm/llvm/test/CodeGen/RISCV/bfloat-arith.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \
; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s
; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \
; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s

; These tests descend from float-arith.ll, where each function was targeted at
; a particular RISC-V FPU instruction.

define bfloat @fadd_s(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: fadd_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fadd.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = fadd bfloat %a, %b
  ret bfloat %1
}

define bfloat @fsub_s(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: fsub_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fsub.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = fsub bfloat %a, %b
  ret bfloat %1
}

define bfloat @fmul_s(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: fmul_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fmul.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = fmul bfloat %a, %b
  ret bfloat %1
}

define bfloat @fdiv_s(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: fdiv_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fdiv.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = fdiv bfloat %a, %b
  ret bfloat %1
}

declare bfloat @llvm.sqrt.bf16(bfloat)

define bfloat @fsqrt_s(bfloat %a) nounwind {
; CHECK-LABEL: fsqrt_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
; CHECK-NEXT:    fsqrt.s fa5, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = call bfloat @llvm.sqrt.bf16(bfloat %a)
  ret bfloat %1
}

declare bfloat @llvm.copysign.bf16(bfloat, bfloat)

define bfloat @fsgnj_s(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: fsgnj_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fsh fa1, 12(sp)
; RV32IZFBFMIN-NEXT:    fsh fa0, 8(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    lbu a1, 9(sp)
; RV32IZFBFMIN-NEXT:    andi a0, a0, 128
; RV32IZFBFMIN-NEXT:    andi a1, a1, 127
; RV32IZFBFMIN-NEXT:    or a0, a1, a0
; RV32IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV32IZFBFMIN-NEXT:    flh fa0, 8(sp)
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fsgnj_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fsh fa1, 8(sp)
; RV64IZFBFMIN-NEXT:    fsh fa0, 0(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    lbu a1, 1(sp)
; RV64IZFBFMIN-NEXT:    andi a0, a0, 128
; RV64IZFBFMIN-NEXT:    andi a1, a1, 127
; RV64IZFBFMIN-NEXT:    or a0, a1, a0
; RV64IZFBFMIN-NEXT:    sb a0, 1(sp)
; RV64IZFBFMIN-NEXT:    flh fa0, 0(sp)
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b)
  ret bfloat %1
}

define i32 @fneg_s(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: fneg_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa4, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV32IZFBFMIN-NEXT:    feq.s a0, fa5, fa4
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fneg_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa4, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV64IZFBFMIN-NEXT:    feq.s a0, fa5, fa4
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %1 = fadd bfloat %a, %a
  %2 = fneg bfloat %1
  %3 = fcmp oeq bfloat %1, %2
  %4 = zext i1 %3 to i32
  ret i32 %4
}

define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: fsgnjn_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 4(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 5(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 5(sp)
; RV32IZFBFMIN-NEXT:    flh fa5, 4(sp)
; RV32IZFBFMIN-NEXT:    fsh fa0, 8(sp)
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV32IZFBFMIN-NEXT:    lbu a1, 13(sp)
; RV32IZFBFMIN-NEXT:    andi a0, a0, 127
; RV32IZFBFMIN-NEXT:    andi a1, a1, 128
; RV32IZFBFMIN-NEXT:    or a0, a0, a1
; RV32IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV32IZFBFMIN-NEXT:    flh fa0, 8(sp)
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fsgnjn_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -32
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    fsh fa0, 16(sp)
; RV64IZFBFMIN-NEXT:    fsh fa5, 24(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 17(sp)
; RV64IZFBFMIN-NEXT:    lbu a1, 25(sp)
; RV64IZFBFMIN-NEXT:    andi a0, a0, 127
; RV64IZFBFMIN-NEXT:    andi a1, a1, 128
; RV64IZFBFMIN-NEXT:    or a0, a0, a1
; RV64IZFBFMIN-NEXT:    sb a0, 17(sp)
; RV64IZFBFMIN-NEXT:    flh fa0, 16(sp)
; RV64IZFBFMIN-NEXT:    addi sp, sp, 32
; RV64IZFBFMIN-NEXT:    ret
  %1 = fadd bfloat %a, %b
  %2 = fneg bfloat %1
  %3 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %2)
  ret bfloat %3
}

declare bfloat @llvm.fabs.bf16(bfloat)

define bfloat @fabs_s(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: fabs_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    andi a0, a0, 127
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa4, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fabs_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    andi a0, a0, 127
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa4, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %1 = fadd bfloat %a, %b
  %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
  %3 = fadd bfloat %2, %1
  ret bfloat %3
}

declare bfloat @llvm.minnum.bf16(bfloat, bfloat)

define bfloat @fmin_s(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: fmin_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fmin.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = call bfloat @llvm.minnum.bf16(bfloat %a, bfloat %b)
  ret bfloat %1
}

declare bfloat @llvm.maxnum.bf16(bfloat, bfloat)

define bfloat @fmax_s(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: fmax_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fmax.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b)
  ret bfloat %1
}

declare bfloat @llvm.fma.bf16(bfloat, bfloat, bfloat)

define bfloat @fmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
; CHECK-LABEL: fmadd_s:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
  ret bfloat %1
}

define bfloat @fmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fmsub_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fmsub_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
  %negc = fsub bfloat -0.0, %c_
  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %negc)
  ret bfloat %1
}

define bfloat @fnmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmadd_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV32IZFBFMIN-NEXT:    flh fa4, 8(sp)
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa4, fa3, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmadd_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 0(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 1(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 1(sp)
; RV64IZFBFMIN-NEXT:    flh fa4, 0(sp)
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa4, fa3, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %a_ = fadd bfloat 0.0, %a
  %c_ = fadd bfloat 0.0, %c
  %nega = fsub bfloat -0.0, %a_
  %negc = fsub bfloat -0.0, %c_
  %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %negc)
  ret bfloat %1
}

define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmadd_s_2:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV32IZFBFMIN-NEXT:    flh fa4, 8(sp)
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmadd_s_2:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 0(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 1(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 1(sp)
; RV64IZFBFMIN-NEXT:    flh fa4, 0(sp)
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %b_ = fadd bfloat 0.0, %b
  %c_ = fadd bfloat 0.0, %c
  %negb = fsub bfloat -0.0, %b_
  %negc = fsub bfloat -0.0, %c_
  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %negc)
  ret bfloat %1
}

define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmadd_s_3:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa0, 12(sp)
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmadd_s_3:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa0, 8(sp)
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
  %neg = fneg bfloat %1
  ret bfloat %neg
}


define bfloat @fnmadd_nsz(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmadd_nsz:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa0, 12(sp)
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmadd_nsz:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa0, 8(sp)
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %1 = call nsz bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
  %neg = fneg nsz bfloat %1
  ret bfloat %neg
}

define bfloat @fnmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmsub_s:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmsub_s:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa5, fa3, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %a_ = fadd bfloat 0.0, %a
  %nega = fsub bfloat -0.0, %a_
  %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %c)
  ret bfloat %1
}

define bfloat @fnmsub_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmsub_s_2:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmsub_s_2:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa2
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa0
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fmadd.s fa5, fa3, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %b_ = fadd bfloat 0.0, %b
  %negb = fsub bfloat -0.0, %b_
  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %c)
  ret bfloat %1
}

define bfloat @fmadd_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
; CHECK-LABEL: fmadd_s_contract:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
; CHECK-NEXT:    fmul.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa2
; CHECK-NEXT:    fadd.s fa5, fa5, fa4
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %1 = fmul contract bfloat %a, %b
  %2 = fadd contract bfloat %1, %c
  ret bfloat %2
}

define bfloat @fmsub_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
; CHECK-LABEL: fmsub_s_contract:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
; CHECK-NEXT:    fmv.w.x fa4, zero
; CHECK-NEXT:    fadd.s fa5, fa5, fa4
; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
; CHECK-NEXT:    fmul.s fa4, fa3, fa4
; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
; CHECK-NEXT:    fsub.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
  %1 = fmul contract bfloat %a, %b
  %2 = fsub contract bfloat %1, %c_
  ret bfloat %2
}

define bfloat @fnmadd_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
; RV32IZFBFMIN-LABEL: fnmadd_s_contract:
; RV32IZFBFMIN:       # %bb.0:
; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV32IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
; RV32IZFBFMIN-NEXT:    fadd.s fa3, fa3, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa3, fa3
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa3
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fmul.s fa5, fa5, fa3
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV32IZFBFMIN-NEXT:    xori a0, a0, 128
; RV32IZFBFMIN-NEXT:    sb a0, 13(sp)
; RV32IZFBFMIN-NEXT:    flh fa3, 12(sp)
; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa3
; RV32IZFBFMIN-NEXT:    fsub.s fa5, fa4, fa5
; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
; RV32IZFBFMIN-NEXT:    ret
;
; RV64IZFBFMIN-LABEL: fnmadd_s_contract:
; RV64IZFBFMIN:       # %bb.0:
; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
; RV64IZFBFMIN-NEXT:    fmv.w.x fa4, zero
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa1
; RV64IZFBFMIN-NEXT:    fadd.s fa3, fa3, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa3, fa3
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa3, fa3
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fmul.s fa5, fa5, fa3
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa2
; RV64IZFBFMIN-NEXT:    xori a0, a0, 128
; RV64IZFBFMIN-NEXT:    sb a0, 9(sp)
; RV64IZFBFMIN-NEXT:    flh fa3, 8(sp)
; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa3
; RV64IZFBFMIN-NEXT:    fsub.s fa5, fa4, fa5
; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
; RV64IZFBFMIN-NEXT:    ret
  %a_ = fadd bfloat 0.0, %a ; avoid negation using xor
  %b_ = fadd bfloat 0.0, %b ; avoid negation using xor
  %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
  %1 = fmul contract bfloat %a_, %b_
  %2 = fneg bfloat %1
  %3 = fsub contract bfloat %2, %c_
  ret bfloat %3
}

define bfloat @fnmsub_s_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
; CHECK-LABEL: fnmsub_s_contract:
; CHECK:       # %bb.0:
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
; CHECK-NEXT:    fmv.w.x fa4, zero
; CHECK-NEXT:    fadd.s fa5, fa5, fa4
; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
; CHECK-NEXT:    fadd.s fa4, fa3, fa4
; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
; CHECK-NEXT:    fmul.s fa5, fa5, fa4
; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
; CHECK-NEXT:    fcvt.s.bf16 fa4, fa2
; CHECK-NEXT:    fsub.s fa5, fa4, fa5
; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
; CHECK-NEXT:    ret
  %a_ = fadd bfloat 0.0, %a ; avoid negation using xor
  %b_ = fadd bfloat 0.0, %b ; avoid negation using xor
  %1 = fmul contract bfloat %a_, %b_
  %2 = fsub contract bfloat %c, %1
  ret bfloat %2
}