llvm/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -target-abi=lp64d \
; RUN: -riscv-disable-frm-insert-opt < %s | FileCheck %s --check-prefix=UNOPT

declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
  <vscale x 1 x float>,
  <vscale x 1 x float>,
  <vscale x 1 x float>,
  i64, i64)

; Test only save/restore frm once.
define <vscale x 1 x float> @test(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: test:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi a1, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    vfadd.vv v8, v8, v8
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: test:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi a1, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    fsrmi a0, 0
; UNOPT-NEXT:    vfadd.vv v8, v8, v8
; UNOPT-NEXT:    fsrm a0
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %a,
    <vscale x 1 x float> %a,
    i64 0, i64 %2)
  ret <vscale x 1 x float> %b
}

; Test only restore frm once.
define <vscale x 1 x float> @test2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: test2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi a1, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    fsrmi 1
; CHECK-NEXT:    vfadd.vv v8, v8, v8
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: test2:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi a1, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    fsrmi a0, 1
; UNOPT-NEXT:    vfadd.vv v8, v8, v8
; UNOPT-NEXT:    fsrm a0
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %a,
    <vscale x 1 x float> %a,
    i64 1, i64 %2)
  ret <vscale x 1 x float> %b
}

declare void @foo()
define <vscale x 1 x float> @just_call(<vscale x 1 x float> %0) nounwind {
; CHECK-LABEL: just_call:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi sp, sp, -48
; CHECK-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT:    csrr a0, vlenb
; CHECK-NEXT:    sub sp, sp, a0
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT:    call foo
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT:    csrr a0, vlenb
; CHECK-NEXT:    add sp, sp, a0
; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT:    addi sp, sp, 48
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: just_call:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    addi sp, sp, -48
; UNOPT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; UNOPT-NEXT:    csrr a0, vlenb
; UNOPT-NEXT:    sub sp, sp, a0
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; UNOPT-NEXT:    call foo
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; UNOPT-NEXT:    csrr a0, vlenb
; UNOPT-NEXT:    add sp, sp, a0
; UNOPT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; UNOPT-NEXT:    addi sp, sp, 48
; UNOPT-NEXT:    ret
entry:
  call void @foo()
  ret <vscale x 1 x float> %0
}

define <vscale x 1 x float> @before_call1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: before_call1:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi sp, sp, -48
; CHECK-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT:    csrr a1, vlenb
; CHECK-NEXT:    sub sp, sp, a1
; CHECK-NEXT:    fsrmi a1, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    call foo
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT:    csrr a0, vlenb
; CHECK-NEXT:    add sp, sp, a0
; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT:    addi sp, sp, 48
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: before_call1:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    addi sp, sp, -48
; UNOPT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; UNOPT-NEXT:    csrr a1, vlenb
; UNOPT-NEXT:    sub sp, sp, a1
; UNOPT-NEXT:    fsrmi a1, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    call foo
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; UNOPT-NEXT:    csrr a0, vlenb
; UNOPT-NEXT:    add sp, sp, a0
; UNOPT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; UNOPT-NEXT:    addi sp, sp, 48
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  call void @foo()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @before_call2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: before_call2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi sp, sp, -48
; CHECK-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT:    csrr a1, vlenb
; CHECK-NEXT:    sub sp, sp, a1
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT:    call foo
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT:    csrr a0, vlenb
; CHECK-NEXT:    add sp, sp, a0
; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT:    addi sp, sp, 48
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: before_call2:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    addi sp, sp, -48
; UNOPT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; UNOPT-NEXT:    csrr a1, vlenb
; UNOPT-NEXT:    sub sp, sp, a1
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; UNOPT-NEXT:    call foo
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; UNOPT-NEXT:    csrr a0, vlenb
; UNOPT-NEXT:    add sp, sp, a0
; UNOPT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; UNOPT-NEXT:    addi sp, sp, 48
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 7, i64 %2)
  call void @foo()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @after_call1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_call1:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi sp, sp, -48
; CHECK-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT:    csrr a1, vlenb
; CHECK-NEXT:    sub sp, sp, a1
; CHECK-NEXT:    fsrmi a1, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    call foo
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT:    csrr a0, vlenb
; CHECK-NEXT:    add sp, sp, a0
; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT:    addi sp, sp, 48
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_call1:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    addi sp, sp, -48
; UNOPT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; UNOPT-NEXT:    csrr a1, vlenb
; UNOPT-NEXT:    sub sp, sp, a1
; UNOPT-NEXT:    fsrmi a1, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    call foo
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; UNOPT-NEXT:    csrr a0, vlenb
; UNOPT-NEXT:    add sp, sp, a0
; UNOPT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; UNOPT-NEXT:    addi sp, sp, 48
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  call void @foo()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @after_call2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_call2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi sp, sp, -48
; CHECK-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT:    csrr a1, vlenb
; CHECK-NEXT:    sub sp, sp, a1
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT:    call foo
; CHECK-NEXT:    addi a0, sp, 32
; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT:    csrr a0, vlenb
; CHECK-NEXT:    add sp, sp, a0
; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT:    addi sp, sp, 48
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_call2:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    addi sp, sp, -48
; UNOPT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; UNOPT-NEXT:    csrr a1, vlenb
; UNOPT-NEXT:    sub sp, sp, a1
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
; UNOPT-NEXT:    call foo
; UNOPT-NEXT:    addi a0, sp, 32
; UNOPT-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
; UNOPT-NEXT:    csrr a0, vlenb
; UNOPT-NEXT:    add sp, sp, a0
; UNOPT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; UNOPT-NEXT:    addi sp, sp, 48
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 7, i64 %2)
  call void @foo()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @just_asm(<vscale x 1 x float> %0) nounwind {
; CHECK-LABEL: just_asm:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    #APP
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: just_asm:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    #APP
; UNOPT-NEXT:    #NO_APP
; UNOPT-NEXT:    ret
entry:
  call void asm sideeffect "", ""()
  ret <vscale x 1 x float> %0
}

define <vscale x 1 x float> @before_asm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: before_asm1:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi a1, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    #APP
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: before_asm1:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi a1, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    #APP
; UNOPT-NEXT:    #NO_APP
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  call void asm sideeffect "", ""()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @before_asm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: before_asm2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    #APP
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: before_asm2:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    #APP
; UNOPT-NEXT:    #NO_APP
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 7, i64 %2)
  call void asm sideeffect "", ""()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @after_asm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_asm1:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi a1, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    #APP
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_asm1:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi a1, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    #APP
; UNOPT-NEXT:    #NO_APP
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  call void asm sideeffect "", ""()
  ret <vscale x 1 x float> %a
}

define <vscale x 1 x float> @after_asm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_asm2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    #APP
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_asm2:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    #APP
; UNOPT-NEXT:    #NO_APP
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 7, i64 %2)
  call void asm sideeffect "", ""()
  ret <vscale x 1 x float> %a
}

; Test restoring frm before reading frm and doing nothing with following
; dynamic rounding mode operations.
; TODO: The frrm could be elided.
declare i32 @llvm.get.rounding()
define <vscale x 1 x float> @test5(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2, ptr %p) nounwind {
; CHECK-LABEL: test5:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi a2, 0
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    fsrm a2
; CHECK-NEXT:    frrm a0
; CHECK-NEXT:    slli a0, a0, 2
; CHECK-NEXT:    lui a2, 66
; CHECK-NEXT:    addiw a2, a2, 769
; CHECK-NEXT:    srl a0, a2, a0
; CHECK-NEXT:    andi a0, a0, 7
; CHECK-NEXT:    vfadd.vv v8, v8, v8
; CHECK-NEXT:    sw a0, 0(a1)
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: test5:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi a2, 0
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a2
; UNOPT-NEXT:    frrm a0
; UNOPT-NEXT:    slli a0, a0, 2
; UNOPT-NEXT:    lui a2, 66
; UNOPT-NEXT:    addiw a2, a2, 769
; UNOPT-NEXT:    srl a0, a2, a0
; UNOPT-NEXT:    andi a0, a0, 7
; UNOPT-NEXT:    vfadd.vv v8, v8, v8
; UNOPT-NEXT:    sw a0, 0(a1)
; UNOPT-NEXT:    ret
entry:
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 0, i64 %2)
  %rm = call i32 @llvm.get.rounding()
  store i32 %rm, ptr %p, align 4
  %b = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %a,
    <vscale x 1 x float> %a,
    i64 7, i64 %2)
  ret <vscale x 1 x float> %b
}

; Test not set FRM for vfadd with DYN after WriteFRMImm.
declare void @llvm.set.rounding(i32)
define <vscale x 1 x float> @after_fsrm1(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_fsrm1:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi 4
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_fsrm1:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi 4
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    ret
entry:
  call void @llvm.set.rounding(i32 4)
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 7, i64 %2)
  ret <vscale x 1 x float> %a
}

; Test not set FRM for vfadd with a known rm after WriteFRMImm with same rm.
define <vscale x 1 x float> @after_fsrm2(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_fsrm2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi 4
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_fsrm2:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi 4
; UNOPT-NEXT:    fsrmi a1, 4
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    ret
entry:
  call void @llvm.set.rounding(i32 4)
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 4, i64 %2)
  ret <vscale x 1 x float> %a
}

; Test not set FRM for vfadd with a known rm after WriteFRMImm with same rm.
define <vscale x 1 x float> @after_fsrm3(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
; CHECK-LABEL: after_fsrm3:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    fsrmi 4
; CHECK-NEXT:    fsrmi a1, 5
; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    fsrm a1
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_fsrm3:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    fsrmi 4
; UNOPT-NEXT:    fsrmi a1, 5
; UNOPT-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    fsrm a1
; UNOPT-NEXT:    ret
entry:
  call void @llvm.set.rounding(i32 4)
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 5, i64 %2)
  ret <vscale x 1 x float> %a
}

; Test not set FRM for the vfadd after WriteFRM.
define <vscale x 1 x float> @after_fsrm4(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %rm, i64 %2) nounwind {
; CHECK-LABEL: after_fsrm4:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    slli a0, a0, 32
; CHECK-NEXT:    srli a0, a0, 30
; CHECK-NEXT:    lui a2, 66
; CHECK-NEXT:    addiw a2, a2, 769
; CHECK-NEXT:    srl a0, a2, a0
; CHECK-NEXT:    andi a0, a0, 7
; CHECK-NEXT:    fsrm a0
; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
; CHECK-NEXT:    vfadd.vv v8, v8, v9
; CHECK-NEXT:    ret
;
; UNOPT-LABEL: after_fsrm4:
; UNOPT:       # %bb.0: # %entry
; UNOPT-NEXT:    slli a0, a0, 32
; UNOPT-NEXT:    srli a0, a0, 30
; UNOPT-NEXT:    lui a2, 66
; UNOPT-NEXT:    addiw a2, a2, 769
; UNOPT-NEXT:    srl a0, a2, a0
; UNOPT-NEXT:    andi a0, a0, 7
; UNOPT-NEXT:    fsrm a0
; UNOPT-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
; UNOPT-NEXT:    vfadd.vv v8, v8, v9
; UNOPT-NEXT:    ret
entry:
  call void @llvm.set.rounding(i32 %rm)
  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
    <vscale x 1 x float> undef,
    <vscale x 1 x float> %0,
    <vscale x 1 x float> %1,
    i64 7, i64 %2)
  ret <vscale x 1 x float> %a
}