; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64
; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D
; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64D
define bfloat @float_to_bfloat(float %a) nounwind {
; RV32I-ILP32-LABEL: float_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: call __truncsfbf2
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: float_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: call __truncsfbf2
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: float_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __truncsfbf2
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: float_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: call __truncsfbf2
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: float_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: call __truncsfbf2
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: float_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: call __truncsfbf2
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fptrunc float %a to bfloat
ret bfloat %1
}
define bfloat @double_to_bfloat(double %a) nounwind {
; RV32I-ILP32-LABEL: double_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: call __truncdfbf2
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: double_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: call __truncdfbf2
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: double_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __truncdfbf2
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: double_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: call __truncdfbf2
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: double_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: call __truncdfbf2
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: double_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: call __truncdfbf2
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fptrunc double %a to bfloat
ret bfloat %1
}
define float @bfloat_to_float(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_float:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_float:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_float:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_float:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_float:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_float:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ret
%1 = fpext bfloat %a to float
ret float %1
}
define double @bfloat_to_double(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_double:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: call __extendsfdf2
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_double:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: call __extendsfdf2
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_double:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32-NEXT: fcvt.d.s fa5, fa5
; RV32ID-ILP32-NEXT: fsd fa5, 8(sp)
; RV32ID-ILP32-NEXT: lw a0, 8(sp)
; RV32ID-ILP32-NEXT: lw a1, 12(sp)
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_double:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: fcvt.d.s fa5, fa5
; RV64ID-LP64-NEXT: fmv.x.d a0, fa5
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_double:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32D-NEXT: fcvt.d.s fa0, fa5
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_double:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64D-NEXT: fcvt.d.s fa0, fa5
; RV64ID-LP64D-NEXT: ret
%1 = fpext bfloat %a to double
ret double %1
}
define bfloat @i16_to_bfloat(i16 %a) nounwind {
; RV32I-ILP32-LABEL: i16_to_bfloat:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: i16_to_bfloat:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: i16_to_bfloat:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: i16_to_bfloat:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: i16_to_bfloat:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: i16_to_bfloat:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ret
%1 = bitcast i16 %a to bfloat
ret bfloat %1
}
define i16 @bfloat_to_i16(bfloat %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_to_i16:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_to_i16:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_to_i16:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_to_i16:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_to_i16:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_to_i16:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: ret
%1 = bitcast bfloat %a to i16
ret i16 %1
}
define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
; RV32I-ILP32-LABEL: bfloat_add:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: slli a0, a0, 16
; RV32I-ILP32-NEXT: slli a1, a1, 16
; RV32I-ILP32-NEXT: call __addsf3
; RV32I-ILP32-NEXT: call __truncsfbf2
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_add:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: slliw a0, a0, 16
; RV64I-LP64-NEXT: slliw a1, a1, 16
; RV64I-LP64-NEXT: call __addsf3
; RV64I-LP64-NEXT: call __truncsfbf2
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_add:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_add:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_add:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_add:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: fmv.x.w a1, fa1
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fadd bfloat %a, %b
ret bfloat %1
}
define bfloat @bfloat_load(ptr %a) nounwind {
; RV32I-ILP32-LABEL: bfloat_load:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: lh a1, 0(a0)
; RV32I-ILP32-NEXT: lh a2, 6(a0)
; RV32I-ILP32-NEXT: slli a0, a1, 16
; RV32I-ILP32-NEXT: slli a1, a2, 16
; RV32I-ILP32-NEXT: call __addsf3
; RV32I-ILP32-NEXT: call __truncsfbf2
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_load:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: lh a1, 0(a0)
; RV64I-LP64-NEXT: lh a2, 6(a0)
; RV64I-LP64-NEXT: slliw a0, a1, 16
; RV64I-LP64-NEXT: slliw a1, a2, 16
; RV64I-LP64-NEXT: call __addsf3
; RV64I-LP64-NEXT: call __truncsfbf2
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_load:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: lhu a1, 6(a0)
; RV32ID-ILP32-NEXT: lhu a0, 0(a0)
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32-NEXT: slli a0, a0, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2
; RV32ID-ILP32-NEXT: lui a1, 1048560
; RV32ID-ILP32-NEXT: or a0, a0, a1
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_load:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: lhu a1, 6(a0)
; RV64ID-LP64-NEXT: lhu a0, 0(a0)
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64-NEXT: slli a0, a0, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2
; RV64ID-LP64-NEXT: lui a1, 1048560
; RV64ID-LP64-NEXT: or a0, a0, a1
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_load:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: lhu a1, 6(a0)
; RV32ID-ILP32D-NEXT: lhu a0, 0(a0)
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: lui a1, 1048560
; RV32ID-ILP32D-NEXT: or a0, a0, a1
; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_load:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: lhu a1, 6(a0)
; RV64ID-LP64D-NEXT: lhu a0, 0(a0)
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 1048560
; RV64ID-LP64D-NEXT: or a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = load bfloat, ptr %a
%2 = getelementptr bfloat, ptr %a, i32 3
%3 = load bfloat, ptr %2
%4 = fadd bfloat %1, %3
ret bfloat %4
}
define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
; RV32I-ILP32-LABEL: bfloat_store:
; RV32I-ILP32: # %bb.0:
; RV32I-ILP32-NEXT: addi sp, sp, -16
; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-ILP32-NEXT: mv s0, a0
; RV32I-ILP32-NEXT: slli a0, a1, 16
; RV32I-ILP32-NEXT: slli a1, a2, 16
; RV32I-ILP32-NEXT: call __addsf3
; RV32I-ILP32-NEXT: call __truncsfbf2
; RV32I-ILP32-NEXT: sh a0, 0(s0)
; RV32I-ILP32-NEXT: sh a0, 16(s0)
; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-ILP32-NEXT: addi sp, sp, 16
; RV32I-ILP32-NEXT: ret
;
; RV64I-LP64-LABEL: bfloat_store:
; RV64I-LP64: # %bb.0:
; RV64I-LP64-NEXT: addi sp, sp, -16
; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-LP64-NEXT: mv s0, a0
; RV64I-LP64-NEXT: slliw a0, a1, 16
; RV64I-LP64-NEXT: slliw a1, a2, 16
; RV64I-LP64-NEXT: call __addsf3
; RV64I-LP64-NEXT: call __truncsfbf2
; RV64I-LP64-NEXT: sh a0, 0(s0)
; RV64I-LP64-NEXT: sh a0, 16(s0)
; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-LP64-NEXT: addi sp, sp, 16
; RV64I-LP64-NEXT: ret
;
; RV32ID-ILP32-LABEL: bfloat_store:
; RV32ID-ILP32: # %bb.0:
; RV32ID-ILP32-NEXT: addi sp, sp, -16
; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: mv s0, a0
; RV32ID-ILP32-NEXT: slli a2, a2, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2
; RV32ID-ILP32-NEXT: slli a1, a1, 16
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1
; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5
; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5
; RV32ID-ILP32-NEXT: call __truncsfbf2
; RV32ID-ILP32-NEXT: sh a0, 0(s0)
; RV32ID-ILP32-NEXT: sh a0, 16(s0)
; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-ILP32-NEXT: addi sp, sp, 16
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: bfloat_store:
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: mv s0, a0
; RV64ID-LP64-NEXT: slli a2, a2, 16
; RV64ID-LP64-NEXT: fmv.w.x fa5, a2
; RV64ID-LP64-NEXT: slli a1, a1, 16
; RV64ID-LP64-NEXT: fmv.w.x fa4, a1
; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfbf2
; RV64ID-LP64-NEXT: sh a0, 0(s0)
; RV64ID-LP64-NEXT: sh a0, 16(s0)
; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64ID-LP64-NEXT: addi sp, sp, 16
; RV64ID-LP64-NEXT: ret
;
; RV32ID-ILP32D-LABEL: bfloat_store:
; RV32ID-ILP32D: # %bb.0:
; RV32ID-ILP32D-NEXT: addi sp, sp, -16
; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-ILP32D-NEXT: mv s0, a0
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1
; RV32ID-ILP32D-NEXT: slli a1, a1, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1
; RV32ID-ILP32D-NEXT: slli a0, a0, 16
; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0
; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5
; RV32ID-ILP32D-NEXT: call __truncsfbf2
; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32ID-ILP32D-NEXT: sh a0, 0(s0)
; RV32ID-ILP32D-NEXT: sh a0, 16(s0)
; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-ILP32D-NEXT: addi sp, sp, 16
; RV32ID-ILP32D-NEXT: ret
;
; RV64ID-LP64D-LABEL: bfloat_store:
; RV64ID-LP64D: # %bb.0:
; RV64ID-LP64D-NEXT: addi sp, sp, -16
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: mv s0, a0
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: fmv.x.w a1, fa1
; RV64ID-LP64D-NEXT: slli a1, a1, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1
; RV64ID-LP64D-NEXT: slli a0, a0, 16
; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0
; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5
; RV64ID-LP64D-NEXT: call __truncsfbf2
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: sh a0, 0(s0)
; RV64ID-LP64D-NEXT: sh a0, 16(s0)
; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64ID-LP64D-NEXT: addi sp, sp, 16
; RV64ID-LP64D-NEXT: ret
%1 = fadd bfloat %b, %c
store bfloat %1, ptr %a
%2 = getelementptr bfloat, ptr %a, i32 8
store bfloat %1, ptr %2
ret void
}