llvm/llvm/test/CodeGen/RISCV/pr69586.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
; RUN:   -riscv-use-rematerializable-movimm=false | FileCheck %s --check-prefix=NOREMAT
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
; RUN:   --riscv-use-rematerializable-movimm=true | FileCheck %s --check-prefix=REMAT

define void @test(ptr %0, ptr %1, i64 %2) {
; NOREMAT-LABEL: test:
; NOREMAT:       # %bb.0:
; NOREMAT-NEXT:    addi sp, sp, -400
; NOREMAT-NEXT:    .cfi_def_cfa_offset 400
; NOREMAT-NEXT:    sd ra, 392(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s0, 384(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s1, 376(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s2, 368(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s5, 344(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s6, 336(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s7, 328(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s8, 320(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s9, 312(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s10, 304(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    sd s11, 296(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    .cfi_offset ra, -8
; NOREMAT-NEXT:    .cfi_offset s0, -16
; NOREMAT-NEXT:    .cfi_offset s1, -24
; NOREMAT-NEXT:    .cfi_offset s2, -32
; NOREMAT-NEXT:    .cfi_offset s3, -40
; NOREMAT-NEXT:    .cfi_offset s4, -48
; NOREMAT-NEXT:    .cfi_offset s5, -56
; NOREMAT-NEXT:    .cfi_offset s6, -64
; NOREMAT-NEXT:    .cfi_offset s7, -72
; NOREMAT-NEXT:    .cfi_offset s8, -80
; NOREMAT-NEXT:    .cfi_offset s9, -88
; NOREMAT-NEXT:    .cfi_offset s10, -96
; NOREMAT-NEXT:    .cfi_offset s11, -104
; NOREMAT-NEXT:    csrr a2, vlenb
; NOREMAT-NEXT:    li a3, 6
; NOREMAT-NEXT:    mul a2, a2, a3
; NOREMAT-NEXT:    sub sp, sp, a2
; NOREMAT-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x03, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 400 + 6 * vlenb
; NOREMAT-NEXT:    li a2, 32
; NOREMAT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
; NOREMAT-NEXT:    vle32.v v8, (a0)
; NOREMAT-NEXT:    addi a2, a0, 512
; NOREMAT-NEXT:    vle32.v v10, (a2)
; NOREMAT-NEXT:    addi a2, a0, 1024
; NOREMAT-NEXT:    vle32.v v12, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
; NOREMAT-NEXT:    vle32.v v8, (a2)
; NOREMAT-NEXT:    addi a2, a0, 1536
; NOREMAT-NEXT:    vle32.v v14, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v10, (a2)
; NOREMAT-NEXT:    li a2, 1
; NOREMAT-NEXT:    slli a2, a2, 11
; NOREMAT-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a2, a0, a2
; NOREMAT-NEXT:    vle32.v v12, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT:    vle32.v v8, (a2)
; NOREMAT-NEXT:    li a5, 5
; NOREMAT-NEXT:    slli a2, a5, 9
; NOREMAT-NEXT:    sd a2, 264(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a2, a0, a2
; NOREMAT-NEXT:    vle32.v v14, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v10, (a2)
; NOREMAT-NEXT:    li a2, 3
; NOREMAT-NEXT:    slli a3, a2, 10
; NOREMAT-NEXT:    sd a3, 256(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v12, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT:    vle32.v v8, (a3)
; NOREMAT-NEXT:    li a4, 7
; NOREMAT-NEXT:    slli a3, a4, 9
; NOREMAT-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v14, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v10, (a3)
; NOREMAT-NEXT:    lui a3, 1
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v12, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT:    vle32.v v8, (a3)
; NOREMAT-NEXT:    li a3, 9
; NOREMAT-NEXT:    slli a6, a3, 9
; NOREMAT-NEXT:    sd a6, 240(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v14, (a6)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v10, (a6)
; NOREMAT-NEXT:    slli a6, a5, 10
; NOREMAT-NEXT:    sd a6, 232(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v12, (a6)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT:    vle32.v v8, (a6)
; NOREMAT-NEXT:    li s8, 11
; NOREMAT-NEXT:    slli a6, s8, 9
; NOREMAT-NEXT:    sd a6, 224(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v14, (a6)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v10, (a6)
; NOREMAT-NEXT:    slli a2, a2, 11
; NOREMAT-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a2, a0, a2
; NOREMAT-NEXT:    vle32.v v12, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT:    vle32.v v8, (a2)
; NOREMAT-NEXT:    li s2, 13
; NOREMAT-NEXT:    slli a2, s2, 9
; NOREMAT-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a2, a0, a2
; NOREMAT-NEXT:    vle32.v v14, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v10, (a2)
; NOREMAT-NEXT:    slli a2, a4, 10
; NOREMAT-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a2, a0, a2
; NOREMAT-NEXT:    vle32.v v12, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT:    vle32.v v8, (a2)
; NOREMAT-NEXT:    li a2, 15
; NOREMAT-NEXT:    slli a6, a2, 9
; NOREMAT-NEXT:    sd a6, 192(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v26, (a6)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT:    vle32.v v16, (a6)
; NOREMAT-NEXT:    lui a6, 2
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v28, (a6)
; NOREMAT-NEXT:    vle32.v v10, (a6)
; NOREMAT-NEXT:    li a6, 17
; NOREMAT-NEXT:    slli a6, a6, 9
; NOREMAT-NEXT:    sd a6, 184(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    li t0, 17
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v30, (a6)
; NOREMAT-NEXT:    vle32.v v18, (a6)
; NOREMAT-NEXT:    slli a6, a3, 10
; NOREMAT-NEXT:    sd a6, 176(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v6, (a6)
; NOREMAT-NEXT:    vle32.v v20, (a6)
; NOREMAT-NEXT:    li a6, 19
; NOREMAT-NEXT:    slli a6, a6, 9
; NOREMAT-NEXT:    sd a6, 168(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    li a7, 19
; NOREMAT-NEXT:    add a6, a0, a6
; NOREMAT-NEXT:    vle32.v v4, (a6)
; NOREMAT-NEXT:    vle32.v v22, (a6)
; NOREMAT-NEXT:    slli a5, a5, 11
; NOREMAT-NEXT:    sd a5, 160(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v2, (a5)
; NOREMAT-NEXT:    vle32.v v12, (a5)
; NOREMAT-NEXT:    li s10, 21
; NOREMAT-NEXT:    slli a5, s10, 9
; NOREMAT-NEXT:    sd a5, 152(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v24, (a5)
; NOREMAT-NEXT:    vle32.v v14, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
; NOREMAT-NEXT:    slli a5, s8, 10
; NOREMAT-NEXT:    sd a5, 144(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v26, (a5)
; NOREMAT-NEXT:    vle32.v v8, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v28
; NOREMAT-NEXT:    li s6, 23
; NOREMAT-NEXT:    slli a5, s6, 9
; NOREMAT-NEXT:    sd a5, 136(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v28, (a5)
; NOREMAT-NEXT:    vle32.v v16, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
; NOREMAT-NEXT:    lui a5, 3
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v30, (a5)
; NOREMAT-NEXT:    vle32.v v10, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v6
; NOREMAT-NEXT:    li s3, 25
; NOREMAT-NEXT:    slli a5, s3, 9
; NOREMAT-NEXT:    sd a5, 128(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v6, (a5)
; NOREMAT-NEXT:    vle32.v v18, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v4
; NOREMAT-NEXT:    slli a5, s2, 10
; NOREMAT-NEXT:    sd a5, 120(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v4, (a5)
; NOREMAT-NEXT:    vle32.v v20, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v2
; NOREMAT-NEXT:    li t5, 27
; NOREMAT-NEXT:    slli a5, t5, 9
; NOREMAT-NEXT:    sd a5, 112(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a5, a0, a5
; NOREMAT-NEXT:    vle32.v v2, (a5)
; NOREMAT-NEXT:    vle32.v v22, (a5)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v24
; NOREMAT-NEXT:    slli a4, a4, 11
; NOREMAT-NEXT:    sd a4, 104(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v24, (a4)
; NOREMAT-NEXT:    vle32.v v12, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v26
; NOREMAT-NEXT:    li t2, 29
; NOREMAT-NEXT:    slli a4, t2, 9
; NOREMAT-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v26, (a4)
; NOREMAT-NEXT:    vle32.v v14, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v28
; NOREMAT-NEXT:    slli a4, a2, 10
; NOREMAT-NEXT:    sd a4, 88(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v28, (a4)
; NOREMAT-NEXT:    vle32.v v8, (a4)
; NOREMAT-NEXT:    csrr a4, vlenb
; NOREMAT-NEXT:    slli a4, a4, 2
; NOREMAT-NEXT:    add a4, sp, a4
; NOREMAT-NEXT:    addi a4, a4, 288
; NOREMAT-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v30
; NOREMAT-NEXT:    li a5, 31
; NOREMAT-NEXT:    slli a4, a5, 9
; NOREMAT-NEXT:    sd a4, 80(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v30, (a4)
; NOREMAT-NEXT:    vle32.v v16, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v6
; NOREMAT-NEXT:    lui a6, 4
; NOREMAT-NEXT:    add a4, a0, a6
; NOREMAT-NEXT:    vle32.v v6, (a4)
; NOREMAT-NEXT:    vle32.v v8, (a4)
; NOREMAT-NEXT:    csrr a4, vlenb
; NOREMAT-NEXT:    slli a4, a4, 1
; NOREMAT-NEXT:    add a4, sp, a4
; NOREMAT-NEXT:    addi a4, a4, 288
; NOREMAT-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
; NOREMAT-NEXT:    addiw a4, a6, 512
; NOREMAT-NEXT:    sd a4, 72(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v4, (a4)
; NOREMAT-NEXT:    vle32.v v18, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v2
; NOREMAT-NEXT:    slli a4, t0, 10
; NOREMAT-NEXT:    sd a4, 64(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v2, (a4)
; NOREMAT-NEXT:    vle32.v v20, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
; NOREMAT-NEXT:    addiw a4, a6, 1536
; NOREMAT-NEXT:    sd a4, 56(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a4, a0, a4
; NOREMAT-NEXT:    vle32.v v0, (a4)
; NOREMAT-NEXT:    vle32.v v22, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v26
; NOREMAT-NEXT:    slli a3, a3, 11
; NOREMAT-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v12, (a3)
; NOREMAT-NEXT:    vle32.v v8, (a3)
; NOREMAT-NEXT:    addi a3, sp, 288
; NOREMAT-NEXT:    vs2r.v v8, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v28
; NOREMAT-NEXT:    lui s1, 5
; NOREMAT-NEXT:    addiw a3, s1, -1536
; NOREMAT-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v8, (a3)
; NOREMAT-NEXT:    vle32.v v24, (a3)
; NOREMAT-NEXT:    csrr a3, vlenb
; NOREMAT-NEXT:    slli a3, a3, 2
; NOREMAT-NEXT:    add a3, sp, a3
; NOREMAT-NEXT:    addi a3, a3, 288
; NOREMAT-NEXT:    vl2r.v v10, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
; NOREMAT-NEXT:    slli a3, a7, 10
; NOREMAT-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v10, (a3)
; NOREMAT-NEXT:    vle32.v v14, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v6
; NOREMAT-NEXT:    addiw a3, s1, -512
; NOREMAT-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
; NOREMAT-NEXT:    add a3, a0, a3
; NOREMAT-NEXT:    vle32.v v6, (a3)
; NOREMAT-NEXT:    vle32.v v16, (a3)
; NOREMAT-NEXT:    csrr a3, vlenb
; NOREMAT-NEXT:    slli a3, a3, 1
; NOREMAT-NEXT:    add a3, sp, a3
; NOREMAT-NEXT:    addi a3, a3, 288
; NOREMAT-NEXT:    vl2r.v v26, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v4
; NOREMAT-NEXT:    add a3, a0, s1
; NOREMAT-NEXT:    vle32.v v26, (a3)
; NOREMAT-NEXT:    vle32.v v28, (a3)
; NOREMAT-NEXT:    csrr a3, vlenb
; NOREMAT-NEXT:    slli a3, a3, 2
; NOREMAT-NEXT:    add a3, sp, a3
; NOREMAT-NEXT:    addi a3, a3, 288
; NOREMAT-NEXT:    vs2r.v v28, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
; NOREMAT-NEXT:    addiw ra, s1, 512
; NOREMAT-NEXT:    add a3, a0, ra
; NOREMAT-NEXT:    vle32.v v28, (a3)
; NOREMAT-NEXT:    vle32.v v30, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v0
; NOREMAT-NEXT:    slli s11, s10, 10
; NOREMAT-NEXT:    add a3, a0, s11
; NOREMAT-NEXT:    vle32.v v4, (a3)
; NOREMAT-NEXT:    vle32.v v18, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v12
; NOREMAT-NEXT:    addiw s10, s1, 1536
; NOREMAT-NEXT:    add a3, a0, s10
; NOREMAT-NEXT:    vle32.v v2, (a3)
; NOREMAT-NEXT:    vle32.v v20, (a3)
; NOREMAT-NEXT:    addi a3, sp, 288
; NOREMAT-NEXT:    vl2r.v v12, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v8
; NOREMAT-NEXT:    slli s9, s8, 11
; NOREMAT-NEXT:    add a3, a0, s9
; NOREMAT-NEXT:    vle32.v v0, (a3)
; NOREMAT-NEXT:    vle32.v v12, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v10
; NOREMAT-NEXT:    lui t0, 6
; NOREMAT-NEXT:    addiw s8, t0, -1536
; NOREMAT-NEXT:    add a3, a0, s8
; NOREMAT-NEXT:    vle32.v v8, (a3)
; NOREMAT-NEXT:    vle32.v v22, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
; NOREMAT-NEXT:    slli s7, s6, 10
; NOREMAT-NEXT:    add a3, a0, s7
; NOREMAT-NEXT:    vle32.v v10, (a3)
; NOREMAT-NEXT:    vle32.v v14, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v26
; NOREMAT-NEXT:    addiw s6, t0, -512
; NOREMAT-NEXT:    add a3, a0, s6
; NOREMAT-NEXT:    vle32.v v6, (a3)
; NOREMAT-NEXT:    vle32.v v16, (a3)
; NOREMAT-NEXT:    csrr a3, vlenb
; NOREMAT-NEXT:    slli a3, a3, 2
; NOREMAT-NEXT:    add a3, sp, a3
; NOREMAT-NEXT:    addi a3, a3, 288
; NOREMAT-NEXT:    vl2r.v v24, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v28
; NOREMAT-NEXT:    add a3, a0, t0
; NOREMAT-NEXT:    vle32.v v24, (a3)
; NOREMAT-NEXT:    vle32.v v26, (a3)
; NOREMAT-NEXT:    csrr a3, vlenb
; NOREMAT-NEXT:    slli a3, a3, 2
; NOREMAT-NEXT:    add a3, sp, a3
; NOREMAT-NEXT:    addi a3, a3, 288
; NOREMAT-NEXT:    vs2r.v v26, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v4
; NOREMAT-NEXT:    addiw s5, t0, 512
; NOREMAT-NEXT:    add a3, a0, s5
; NOREMAT-NEXT:    vle32.v v26, (a3)
; NOREMAT-NEXT:    vle32.v v28, (a3)
; NOREMAT-NEXT:    csrr a3, vlenb
; NOREMAT-NEXT:    slli a3, a3, 1
; NOREMAT-NEXT:    add a3, sp, a3
; NOREMAT-NEXT:    addi a3, a3, 288
; NOREMAT-NEXT:    vs2r.v v28, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
; NOREMAT-NEXT:    slli s4, s3, 10
; NOREMAT-NEXT:    add a3, a0, s4
; NOREMAT-NEXT:    vle32.v v28, (a3)
; NOREMAT-NEXT:    vle32.v v18, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v0
; NOREMAT-NEXT:    addiw s3, t0, 1536
; NOREMAT-NEXT:    add a3, a0, s3
; NOREMAT-NEXT:    vle32.v v30, (a3)
; NOREMAT-NEXT:    vle32.v v20, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v8
; NOREMAT-NEXT:    slli s2, s2, 11
; NOREMAT-NEXT:    add a3, a0, s2
; NOREMAT-NEXT:    vle32.v v4, (a3)
; NOREMAT-NEXT:    vle32.v v12, (a3)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v10
; NOREMAT-NEXT:    lui a3, 7
; NOREMAT-NEXT:    addiw s0, a3, -1536
; NOREMAT-NEXT:    add a4, a0, s0
; NOREMAT-NEXT:    vle32.v v2, (a4)
; NOREMAT-NEXT:    vle32.v v22, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
; NOREMAT-NEXT:    slli t6, t5, 10
; NOREMAT-NEXT:    add a4, a0, t6
; NOREMAT-NEXT:    vle32.v v0, (a4)
; NOREMAT-NEXT:    vle32.v v14, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v24
; NOREMAT-NEXT:    addiw t5, a3, -512
; NOREMAT-NEXT:    add a4, a0, t5
; NOREMAT-NEXT:    vle32.v v6, (a4)
; NOREMAT-NEXT:    vle32.v v16, (a4)
; NOREMAT-NEXT:    csrr a4, vlenb
; NOREMAT-NEXT:    slli a4, a4, 2
; NOREMAT-NEXT:    add a4, sp, a4
; NOREMAT-NEXT:    addi a4, a4, 288
; NOREMAT-NEXT:    vl2r.v v8, (a4) # Unknown-size Folded Reload
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
; NOREMAT-NEXT:    add a4, a0, a3
; NOREMAT-NEXT:    vle32.v v26, (a4)
; NOREMAT-NEXT:    vle32.v v8, (a4)
; NOREMAT-NEXT:    csrr a4, vlenb
; NOREMAT-NEXT:    slli a4, a4, 1
; NOREMAT-NEXT:    add a4, sp, a4
; NOREMAT-NEXT:    addi a4, a4, 288
; NOREMAT-NEXT:    vl2r.v v10, (a4) # Unknown-size Folded Reload
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
; NOREMAT-NEXT:    addiw t4, a3, 512
; NOREMAT-NEXT:    add a4, a0, t4
; NOREMAT-NEXT:    vle32.v v10, (a4)
; NOREMAT-NEXT:    vle32.v v24, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v30
; NOREMAT-NEXT:    slli t3, t2, 10
; NOREMAT-NEXT:    add a4, a0, t3
; NOREMAT-NEXT:    vle32.v v18, (a4)
; NOREMAT-NEXT:    vle32.v v28, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v4
; NOREMAT-NEXT:    addiw t2, a3, 1536
; NOREMAT-NEXT:    add a4, a0, t2
; NOREMAT-NEXT:    vle32.v v20, (a4)
; NOREMAT-NEXT:    vle32.v v30, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v2
; NOREMAT-NEXT:    slli t1, a2, 11
; NOREMAT-NEXT:    add a2, a0, t1
; NOREMAT-NEXT:    vle32.v v12, (a2)
; NOREMAT-NEXT:    vle32.v v4, (a2)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v0
; NOREMAT-NEXT:    lui a2, 8
; NOREMAT-NEXT:    addiw a7, a2, -1536
; NOREMAT-NEXT:    add a4, a0, a7
; NOREMAT-NEXT:    vle32.v v22, (a4)
; NOREMAT-NEXT:    vle32.v v2, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
; NOREMAT-NEXT:    slli a6, a5, 10
; NOREMAT-NEXT:    add a4, a0, a6
; NOREMAT-NEXT:    vle32.v v14, (a4)
; NOREMAT-NEXT:    vle32.v v6, (a4)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v26
; NOREMAT-NEXT:    addiw a5, a2, -512
; NOREMAT-NEXT:    add a4, a0, a5
; NOREMAT-NEXT:    vle32.v v16, (a4)
; NOREMAT-NEXT:    vle32.v v26, (a4)
; NOREMAT-NEXT:    add a0, a0, a2
; NOREMAT-NEXT:    vle32.v v0, (a0)
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v18
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v28, v20
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v12
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v22
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v2, v14
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v16
; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    addi a0, a1, 1024
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    addi a0, a1, 1536
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    lui a0, 1
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    lui a0, 2
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    lui a0, 3
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    lui a0, 4
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s1, a1, s1
; NOREMAT-NEXT:    vse32.v v8, (s1)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add ra, a1, ra
; NOREMAT-NEXT:    vse32.v v8, (ra)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s11, a1, s11
; NOREMAT-NEXT:    vse32.v v8, (s11)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s10, a1, s10
; NOREMAT-NEXT:    vse32.v v8, (s10)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s9, a1, s9
; NOREMAT-NEXT:    vse32.v v8, (s9)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s8, a1, s8
; NOREMAT-NEXT:    vse32.v v8, (s8)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s7, a1, s7
; NOREMAT-NEXT:    vse32.v v8, (s7)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s6, a1, s6
; NOREMAT-NEXT:    vse32.v v8, (s6)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t0, a1, t0
; NOREMAT-NEXT:    vse32.v v8, (t0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s5, a1, s5
; NOREMAT-NEXT:    vse32.v v8, (s5)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s4, a1, s4
; NOREMAT-NEXT:    vse32.v v8, (s4)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s3, a1, s3
; NOREMAT-NEXT:    vse32.v v8, (s3)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s2, a1, s2
; NOREMAT-NEXT:    vse32.v v8, (s2)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add s0, a1, s0
; NOREMAT-NEXT:    vse32.v v8, (s0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t6, a1, t6
; NOREMAT-NEXT:    vse32.v v8, (t6)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t5, a1, t5
; NOREMAT-NEXT:    vse32.v v8, (t5)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add a3, a1, a3
; NOREMAT-NEXT:    vse32.v v8, (a3)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t4, a1, t4
; NOREMAT-NEXT:    vse32.v v8, (t4)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t3, a1, t3
; NOREMAT-NEXT:    vse32.v v8, (t3)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t2, a1, t2
; NOREMAT-NEXT:    vse32.v v8, (t2)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add t1, a1, t1
; NOREMAT-NEXT:    vse32.v v8, (t1)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add a7, a1, a7
; NOREMAT-NEXT:    vse32.v v8, (a7)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add a6, a1, a6
; NOREMAT-NEXT:    vse32.v v8, (a6)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    add a5, a1, a5
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a5)
; NOREMAT-NEXT:    add a0, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v10, (a0)
; NOREMAT-NEXT:    addiw a0, a2, 512
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    addiw a0, a2, 1024
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    vse32.v v10, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    addiw a0, a2, 1536
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    li a0, 17
; NOREMAT-NEXT:    slli a0, a0, 11
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v10, (a0)
; NOREMAT-NEXT:    lui a0, 9
; NOREMAT-NEXT:    addiw a2, a0, -1536
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a2)
; NOREMAT-NEXT:    addiw a2, a0, -1024
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v10, (a2)
; NOREMAT-NEXT:    addiw a2, a0, -512
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a2)
; NOREMAT-NEXT:    add a2, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v10, (a2)
; NOREMAT-NEXT:    addiw a2, a0, 512
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a2)
; NOREMAT-NEXT:    addiw a2, a0, 1024
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    vse32.v v10, (a2)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    addiw a0, a0, 1536
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    li a0, 19
; NOREMAT-NEXT:    slli a0, a0, 11
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v10, (a0)
; NOREMAT-NEXT:    lui a0, 10
; NOREMAT-NEXT:    addiw a2, a0, -1536
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a2)
; NOREMAT-NEXT:    addiw a2, a0, -1024
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v10, (a2)
; NOREMAT-NEXT:    addiw a2, a0, -512
; NOREMAT-NEXT:    add a2, a1, a2
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT:    vse32.v v8, (a2)
; NOREMAT-NEXT:    add a2, a1, a0
; NOREMAT-NEXT:    vse32.v v10, (a2)
; NOREMAT-NEXT:    addiw a0, a0, 512
; NOREMAT-NEXT:    add a0, a1, a0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    vse32.v v8, (a0)
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT:    csrr a0, vlenb
; NOREMAT-NEXT:    li a1, 6
; NOREMAT-NEXT:    mul a0, a0, a1
; NOREMAT-NEXT:    add sp, sp, a0
; NOREMAT-NEXT:    ld ra, 392(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s0, 384(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s1, 376(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s2, 368(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s3, 360(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s4, 352(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s5, 344(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s7, 328(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s9, 312(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s10, 304(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    ld s11, 296(sp) # 8-byte Folded Reload
; NOREMAT-NEXT:    addi sp, sp, 400
; NOREMAT-NEXT:    ret
;
; REMAT-LABEL: test:
; REMAT:       # %bb.0:
; REMAT-NEXT:    addi sp, sp, -112
; REMAT-NEXT:    .cfi_def_cfa_offset 112
; REMAT-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
; REMAT-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
; REMAT-NEXT:    .cfi_offset ra, -8
; REMAT-NEXT:    .cfi_offset s0, -16
; REMAT-NEXT:    .cfi_offset s1, -24
; REMAT-NEXT:    .cfi_offset s2, -32
; REMAT-NEXT:    .cfi_offset s3, -40
; REMAT-NEXT:    .cfi_offset s4, -48
; REMAT-NEXT:    .cfi_offset s5, -56
; REMAT-NEXT:    .cfi_offset s6, -64
; REMAT-NEXT:    .cfi_offset s7, -72
; REMAT-NEXT:    .cfi_offset s8, -80
; REMAT-NEXT:    .cfi_offset s9, -88
; REMAT-NEXT:    .cfi_offset s10, -96
; REMAT-NEXT:    .cfi_offset s11, -104
; REMAT-NEXT:    li a2, 32
; REMAT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
; REMAT-NEXT:    vle32.v v8, (a0)
; REMAT-NEXT:    addi a2, a0, 512
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    addi a2, a0, 1024
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    addi a2, a0, 1536
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 1
; REMAT-NEXT:    slli a2, a2, 11
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 5
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 3
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 7
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    lui a2, 1
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 9
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 5
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 11
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 3
; REMAT-NEXT:    slli a2, a2, 11
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 13
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 7
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 15
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    lui a2, 2
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 17
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 9
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 19
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 5
; REMAT-NEXT:    slli a2, a2, 11
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    li a2, 21
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    li a2, 11
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v26, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    li a2, 23
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v28, (a2)
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    lui a2, 3
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v30, (a2)
; REMAT-NEXT:    vle32.v v16, (a2)
; REMAT-NEXT:    li a2, 25
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v6, (a2)
; REMAT-NEXT:    vle32.v v18, (a2)
; REMAT-NEXT:    li a2, 13
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v4, (a2)
; REMAT-NEXT:    vle32.v v20, (a2)
; REMAT-NEXT:    li a2, 27
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v2, (a2)
; REMAT-NEXT:    vle32.v v22, (a2)
; REMAT-NEXT:    li a2, 7
; REMAT-NEXT:    slli a2, a2, 11
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v24, (a2)
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v26
; REMAT-NEXT:    li a2, 29
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v26, (a2)
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v28
; REMAT-NEXT:    li a2, 15
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v28, (a2)
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v30
; REMAT-NEXT:    li a2, 31
; REMAT-NEXT:    slli a2, a2, 9
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v30, (a2)
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v6
; REMAT-NEXT:    lui a2, 4
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v6, (a2)
; REMAT-NEXT:    vle32.v v16, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
; REMAT-NEXT:    lui a2, 4
; REMAT-NEXT:    addiw a2, a2, 512
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v4, (a2)
; REMAT-NEXT:    vle32.v v18, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v2
; REMAT-NEXT:    li a2, 17
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v2, (a2)
; REMAT-NEXT:    vle32.v v20, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
; REMAT-NEXT:    lui a2, 4
; REMAT-NEXT:    addiw a2, a2, 1536
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v24, (a2)
; REMAT-NEXT:    vle32.v v22, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
; REMAT-NEXT:    li a2, 9
; REMAT-NEXT:    slli a2, a2, 11
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v26, (a2)
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
; REMAT-NEXT:    lui a2, 5
; REMAT-NEXT:    addiw a2, a2, -1536
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v28, (a2)
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
; REMAT-NEXT:    li a2, 19
; REMAT-NEXT:    slli a2, a2, 10
; REMAT-NEXT:    add a2, a0, a2
; REMAT-NEXT:    vle32.v v30, (a2)
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
; REMAT-NEXT:    lui ra, 5
; REMAT-NEXT:    addiw ra, ra, -512
; REMAT-NEXT:    add a2, a0, ra
; REMAT-NEXT:    vle32.v v6, (a2)
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v4
; REMAT-NEXT:    lui s11, 5
; REMAT-NEXT:    add a2, a0, s11
; REMAT-NEXT:    vle32.v v4, (a2)
; REMAT-NEXT:    vle32.v v16, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
; REMAT-NEXT:    lui s10, 5
; REMAT-NEXT:    addiw s10, s10, 512
; REMAT-NEXT:    add a2, a0, s10
; REMAT-NEXT:    vle32.v v2, (a2)
; REMAT-NEXT:    vle32.v v18, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v24
; REMAT-NEXT:    li s9, 21
; REMAT-NEXT:    slli s9, s9, 10
; REMAT-NEXT:    add a2, a0, s9
; REMAT-NEXT:    vle32.v v24, (a2)
; REMAT-NEXT:    vle32.v v20, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v26
; REMAT-NEXT:    lui s8, 5
; REMAT-NEXT:    addiw s8, s8, 1536
; REMAT-NEXT:    add a2, a0, s8
; REMAT-NEXT:    vle32.v v26, (a2)
; REMAT-NEXT:    vle32.v v22, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v28
; REMAT-NEXT:    li s7, 11
; REMAT-NEXT:    slli s7, s7, 11
; REMAT-NEXT:    add a2, a0, s7
; REMAT-NEXT:    vle32.v v28, (a2)
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
; REMAT-NEXT:    lui s6, 6
; REMAT-NEXT:    addiw s6, s6, -1536
; REMAT-NEXT:    add a2, a0, s6
; REMAT-NEXT:    vle32.v v30, (a2)
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v6
; REMAT-NEXT:    li s5, 23
; REMAT-NEXT:    slli s5, s5, 10
; REMAT-NEXT:    add a2, a0, s5
; REMAT-NEXT:    vle32.v v6, (a2)
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v4
; REMAT-NEXT:    lui s4, 6
; REMAT-NEXT:    addiw s4, s4, -512
; REMAT-NEXT:    add a2, a0, s4
; REMAT-NEXT:    vle32.v v4, (a2)
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v2
; REMAT-NEXT:    lui s3, 6
; REMAT-NEXT:    add a2, a0, s3
; REMAT-NEXT:    vle32.v v2, (a2)
; REMAT-NEXT:    vle32.v v16, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v24
; REMAT-NEXT:    lui s2, 6
; REMAT-NEXT:    addiw s2, s2, 512
; REMAT-NEXT:    add a2, a0, s2
; REMAT-NEXT:    vle32.v v0, (a2)
; REMAT-NEXT:    vle32.v v18, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v26
; REMAT-NEXT:    li s1, 25
; REMAT-NEXT:    slli s1, s1, 10
; REMAT-NEXT:    add a2, a0, s1
; REMAT-NEXT:    vle32.v v26, (a2)
; REMAT-NEXT:    vle32.v v20, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v28
; REMAT-NEXT:    lui s0, 6
; REMAT-NEXT:    addiw s0, s0, 1536
; REMAT-NEXT:    add a2, a0, s0
; REMAT-NEXT:    vle32.v v28, (a2)
; REMAT-NEXT:    vle32.v v22, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v30
; REMAT-NEXT:    li t6, 13
; REMAT-NEXT:    slli t6, t6, 11
; REMAT-NEXT:    add a2, a0, t6
; REMAT-NEXT:    vle32.v v30, (a2)
; REMAT-NEXT:    vle32.v v24, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v6
; REMAT-NEXT:    lui t5, 7
; REMAT-NEXT:    addiw t5, t5, -1536
; REMAT-NEXT:    add a2, a0, t5
; REMAT-NEXT:    vle32.v v6, (a2)
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
; REMAT-NEXT:    li t4, 27
; REMAT-NEXT:    slli t4, t4, 10
; REMAT-NEXT:    add a2, a0, t4
; REMAT-NEXT:    vle32.v v4, (a2)
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v2
; REMAT-NEXT:    lui t3, 7
; REMAT-NEXT:    addiw t3, t3, -512
; REMAT-NEXT:    add a2, a0, t3
; REMAT-NEXT:    vle32.v v2, (a2)
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v0
; REMAT-NEXT:    lui t2, 7
; REMAT-NEXT:    add a2, a0, t2
; REMAT-NEXT:    vle32.v v0, (a2)
; REMAT-NEXT:    vle32.v v8, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v26
; REMAT-NEXT:    lui t1, 7
; REMAT-NEXT:    addiw t1, t1, 512
; REMAT-NEXT:    add a2, a0, t1
; REMAT-NEXT:    vle32.v v16, (a2)
; REMAT-NEXT:    vle32.v v18, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v28
; REMAT-NEXT:    li t0, 29
; REMAT-NEXT:    slli t0, t0, 10
; REMAT-NEXT:    add a2, a0, t0
; REMAT-NEXT:    vle32.v v20, (a2)
; REMAT-NEXT:    vle32.v v26, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v30
; REMAT-NEXT:    lui a7, 7
; REMAT-NEXT:    addiw a7, a7, 1536
; REMAT-NEXT:    add a2, a0, a7
; REMAT-NEXT:    vle32.v v22, (a2)
; REMAT-NEXT:    vle32.v v28, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v24, v6
; REMAT-NEXT:    li a6, 15
; REMAT-NEXT:    slli a6, a6, 11
; REMAT-NEXT:    add a2, a0, a6
; REMAT-NEXT:    vle32.v v24, (a2)
; REMAT-NEXT:    vle32.v v30, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v4
; REMAT-NEXT:    lui a5, 8
; REMAT-NEXT:    addiw a5, a5, -1536
; REMAT-NEXT:    add a2, a0, a5
; REMAT-NEXT:    vle32.v v10, (a2)
; REMAT-NEXT:    vle32.v v6, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v2
; REMAT-NEXT:    li a4, 31
; REMAT-NEXT:    slli a4, a4, 10
; REMAT-NEXT:    add a2, a0, a4
; REMAT-NEXT:    vle32.v v12, (a2)
; REMAT-NEXT:    vle32.v v4, (a2)
; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
; REMAT-NEXT:    lui a3, 8
; REMAT-NEXT:    addiw a3, a3, -512
; REMAT-NEXT:    add a2, a0, a3
; REMAT-NEXT:    vle32.v v14, (a2)
; REMAT-NEXT:    vle32.v v2, (a2)
; REMAT-NEXT:    lui a2, 8
; REMAT-NEXT:    add a0, a0, a2
; REMAT-NEXT:    vle32.v v0, (a0)
; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v16
; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v20
; REMAT-NEXT:    sf.vc.vv 3, 0, v26, v22
; REMAT-NEXT:    sf.vc.vv 3, 0, v28, v24
; REMAT-NEXT:    sf.vc.vv 3, 0, v30, v10
; REMAT-NEXT:    sf.vc.vv 3, 0, v6, v12
; REMAT-NEXT:    sf.vc.vv 3, 0, v4, v14
; REMAT-NEXT:    sf.vc.vv 3, 0, v2, v0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    addi a0, a1, 1024
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    addi a0, a1, 1536
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 1
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 5
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 3
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 7
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 1
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 9
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 5
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 11
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 3
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 13
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 7
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 15
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 2
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 17
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 9
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 19
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 5
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 21
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 11
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 23
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 3
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 25
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 13
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 27
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 7
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 29
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 15
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 31
; REMAT-NEXT:    slli a0, a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 4
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 4
; REMAT-NEXT:    addiw a0, a0, 512
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 17
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 4
; REMAT-NEXT:    addiw a0, a0, 1536
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 9
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    lui a0, 5
; REMAT-NEXT:    addiw a0, a0, -1536
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    li a0, 19
; REMAT-NEXT:    slli a0, a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add ra, a1, ra
; REMAT-NEXT:    vse32.v v8, (ra)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s11, a1, s11
; REMAT-NEXT:    vse32.v v8, (s11)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s10, a1, s10
; REMAT-NEXT:    vse32.v v8, (s10)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s9, a1, s9
; REMAT-NEXT:    vse32.v v8, (s9)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s8, a1, s8
; REMAT-NEXT:    vse32.v v8, (s8)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s7, a1, s7
; REMAT-NEXT:    vse32.v v8, (s7)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s6, a1, s6
; REMAT-NEXT:    vse32.v v8, (s6)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s5, a1, s5
; REMAT-NEXT:    vse32.v v8, (s5)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s4, a1, s4
; REMAT-NEXT:    vse32.v v8, (s4)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s3, a1, s3
; REMAT-NEXT:    vse32.v v8, (s3)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s2, a1, s2
; REMAT-NEXT:    vse32.v v8, (s2)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s1, a1, s1
; REMAT-NEXT:    vse32.v v8, (s1)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add s0, a1, s0
; REMAT-NEXT:    vse32.v v8, (s0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t6, a1, t6
; REMAT-NEXT:    vse32.v v8, (t6)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t5, a1, t5
; REMAT-NEXT:    vse32.v v8, (t5)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t4, a1, t4
; REMAT-NEXT:    vse32.v v8, (t4)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t3, a1, t3
; REMAT-NEXT:    vse32.v v8, (t3)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t2, a1, t2
; REMAT-NEXT:    vse32.v v8, (t2)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t1, a1, t1
; REMAT-NEXT:    vse32.v v8, (t1)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add t0, a1, t0
; REMAT-NEXT:    vse32.v v8, (t0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add a7, a1, a7
; REMAT-NEXT:    vse32.v v8, (a7)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add a6, a1, a6
; REMAT-NEXT:    vse32.v v8, (a6)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add a5, a1, a5
; REMAT-NEXT:    vse32.v v8, (a5)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add a4, a1, a4
; REMAT-NEXT:    vse32.v v8, (a4)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add a3, a1, a3
; REMAT-NEXT:    vse32.v v8, (a3)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    add a2, a1, a2
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a2)
; REMAT-NEXT:    lui a0, 8
; REMAT-NEXT:    addiw a0, a0, 512
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    lui a0, 8
; REMAT-NEXT:    addiw a0, a0, 1024
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 8
; REMAT-NEXT:    addiw a0, a0, 1536
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    li a0, 17
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    addiw a0, a0, -1536
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    addiw a0, a0, -1024
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    addiw a0, a0, -512
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    addiw a0, a0, 512
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    addiw a0, a0, 1024
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 9
; REMAT-NEXT:    addiw a0, a0, 1536
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    li a0, 19
; REMAT-NEXT:    slli a0, a0, 11
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 10
; REMAT-NEXT:    addiw a0, a0, -1536
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    lui a0, 10
; REMAT-NEXT:    addiw a0, a0, -1024
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 10
; REMAT-NEXT:    addiw a0, a0, -512
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v10, (a0)
; REMAT-NEXT:    lui a0, 10
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    lui a0, 10
; REMAT-NEXT:    addiw a0, a0, 512
; REMAT-NEXT:    add a0, a1, a0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    vse32.v v8, (a0)
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
; REMAT-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
; REMAT-NEXT:    addi sp, sp, 112
; REMAT-NEXT:    ret
  %4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 2, i64 1)
  %5 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %0, i64 %4)
  %6 = getelementptr inbounds i32, ptr %0, i64 128
  %7 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %6, i64 %4)
  %8 = getelementptr inbounds i32, ptr %0, i64 256
  %9 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %5, <vscale x 4 x i32> %7, i64 %4)
  %10 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
  %11 = getelementptr inbounds i32, ptr %0, i64 384
  %12 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %7, <vscale x 4 x i32> %9, i64 %4)
  %13 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
  %14 = getelementptr inbounds i32, ptr %0, i64 512
  %15 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %10, <vscale x 4 x i32> %12, i64 %4)
  %16 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
  %17 = getelementptr inbounds i32, ptr %0, i64 640
  %18 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %13, <vscale x 4 x i32> %15, i64 %4)
  %19 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
  %20 = getelementptr inbounds i32, ptr %0, i64 768
  %21 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %16, <vscale x 4 x i32> %18, i64 %4)
  %22 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
  %23 = getelementptr inbounds i32, ptr %0, i64 896
  %24 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %19, <vscale x 4 x i32> %21, i64 %4)
  %25 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
  %26 = getelementptr inbounds i32, ptr %0, i64 1024
  %27 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %22, <vscale x 4 x i32> %24, i64 %4)
  %28 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
  %29 = getelementptr inbounds i32, ptr %0, i64 1152
  %30 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %25, <vscale x 4 x i32> %27, i64 %4)
  %31 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
  %32 = getelementptr inbounds i32, ptr %0, i64 1280
  %33 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %28, <vscale x 4 x i32> %30, i64 %4)
  %34 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
  %35 = getelementptr inbounds i32, ptr %0, i64 1408
  %36 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %31, <vscale x 4 x i32> %33, i64 %4)
  %37 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
  %38 = getelementptr inbounds i32, ptr %0, i64 1536
  %39 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %34, <vscale x 4 x i32> %36, i64 %4)
  %40 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
  %41 = getelementptr inbounds i32, ptr %0, i64 1664
  %42 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %37, <vscale x 4 x i32> %39, i64 %4)
  %43 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
  %44 = getelementptr inbounds i32, ptr %0, i64 1792
  %45 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %40, <vscale x 4 x i32> %42, i64 %4)
  %46 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
  %47 = getelementptr inbounds i32, ptr %0, i64 1920
  %48 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %43, <vscale x 4 x i32> %45, i64 %4)
  %49 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
  %50 = getelementptr inbounds i32, ptr %0, i64 2048
  %51 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %46, <vscale x 4 x i32> %48, i64 %4)
  %52 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
  %53 = getelementptr inbounds i32, ptr %0, i64 2176
  %54 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %49, <vscale x 4 x i32> %51, i64 %4)
  %55 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
  %56 = getelementptr inbounds i32, ptr %0, i64 2304
  %57 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %52, <vscale x 4 x i32> %54, i64 %4)
  %58 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
  %59 = getelementptr inbounds i32, ptr %0, i64 2432
  %60 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %55, <vscale x 4 x i32> %57, i64 %4)
  %61 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
  %62 = getelementptr inbounds i32, ptr %0, i64 2560
  %63 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %58, <vscale x 4 x i32> %60, i64 %4)
  %64 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
  %65 = getelementptr inbounds i32, ptr %0, i64 2688
  %66 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %61, <vscale x 4 x i32> %63, i64 %4)
  %67 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
  %68 = getelementptr inbounds i32, ptr %0, i64 2816
  %69 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %64, <vscale x 4 x i32> %66, i64 %4)
  %70 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
  %71 = getelementptr inbounds i32, ptr %0, i64 2944
  %72 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %67, <vscale x 4 x i32> %69, i64 %4)
  %73 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
  %74 = getelementptr inbounds i32, ptr %0, i64 3072
  %75 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %70, <vscale x 4 x i32> %72, i64 %4)
  %76 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
  %77 = getelementptr inbounds i32, ptr %0, i64 3200
  %78 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %73, <vscale x 4 x i32> %75, i64 %4)
  %79 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
  %80 = getelementptr inbounds i32, ptr %0, i64 3328
  %81 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %76, <vscale x 4 x i32> %78, i64 %4)
  %82 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
  %83 = getelementptr inbounds i32, ptr %0, i64 3456
  %84 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %79, <vscale x 4 x i32> %81, i64 %4)
  %85 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
  %86 = getelementptr inbounds i32, ptr %0, i64 3584
  %87 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %82, <vscale x 4 x i32> %84, i64 %4)
  %88 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
  %89 = getelementptr inbounds i32, ptr %0, i64 3712
  %90 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %85, <vscale x 4 x i32> %87, i64 %4)
  %91 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
  %92 = getelementptr inbounds i32, ptr %0, i64 3840
  %93 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %88, <vscale x 4 x i32> %90, i64 %4)
  %94 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
  %95 = getelementptr inbounds i32, ptr %0, i64 3968
  %96 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %91, <vscale x 4 x i32> %93, i64 %4)
  %97 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
  %98 = getelementptr inbounds i32, ptr %0, i64 4096
  %99 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %94, <vscale x 4 x i32> %96, i64 %4)
  %100 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
  %101 = getelementptr inbounds i32, ptr %0, i64 4224
  %102 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %97, <vscale x 4 x i32> %99, i64 %4)
  %103 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
  %104 = getelementptr inbounds i32, ptr %0, i64 4352
  %105 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %100, <vscale x 4 x i32> %102, i64 %4)
  %106 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
  %107 = getelementptr inbounds i32, ptr %0, i64 4480
  %108 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %103, <vscale x 4 x i32> %105, i64 %4)
  %109 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
  %110 = getelementptr inbounds i32, ptr %0, i64 4608
  %111 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %106, <vscale x 4 x i32> %108, i64 %4)
  %112 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
  %113 = getelementptr inbounds i32, ptr %0, i64 4736
  %114 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %109, <vscale x 4 x i32> %111, i64 %4)
  %115 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
  %116 = getelementptr inbounds i32, ptr %0, i64 4864
  %117 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %112, <vscale x 4 x i32> %114, i64 %4)
  %118 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
  %119 = getelementptr inbounds i32, ptr %0, i64 4992
  %120 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %115, <vscale x 4 x i32> %117, i64 %4)
  %121 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
  %122 = getelementptr inbounds i32, ptr %0, i64 5120
  %123 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %118, <vscale x 4 x i32> %120, i64 %4)
  %124 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
  %125 = getelementptr inbounds i32, ptr %0, i64 5248
  %126 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %121, <vscale x 4 x i32> %123, i64 %4)
  %127 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
  %128 = getelementptr inbounds i32, ptr %0, i64 5376
  %129 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %124, <vscale x 4 x i32> %126, i64 %4)
  %130 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
  %131 = getelementptr inbounds i32, ptr %0, i64 5504
  %132 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %127, <vscale x 4 x i32> %129, i64 %4)
  %133 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
  %134 = getelementptr inbounds i32, ptr %0, i64 5632
  %135 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %130, <vscale x 4 x i32> %132, i64 %4)
  %136 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
  %137 = getelementptr inbounds i32, ptr %0, i64 5760
  %138 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %133, <vscale x 4 x i32> %135, i64 %4)
  %139 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
  %140 = getelementptr inbounds i32, ptr %0, i64 5888
  %141 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %136, <vscale x 4 x i32> %138, i64 %4)
  %142 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
  %143 = getelementptr inbounds i32, ptr %0, i64 6016
  %144 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %139, <vscale x 4 x i32> %141, i64 %4)
  %145 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
  %146 = getelementptr inbounds i32, ptr %0, i64 6144
  %147 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %142, <vscale x 4 x i32> %144, i64 %4)
  %148 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
  %149 = getelementptr inbounds i32, ptr %0, i64 6272
  %150 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %145, <vscale x 4 x i32> %147, i64 %4)
  %151 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
  %152 = getelementptr inbounds i32, ptr %0, i64 6400
  %153 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %148, <vscale x 4 x i32> %150, i64 %4)
  %154 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
  %155 = getelementptr inbounds i32, ptr %0, i64 6528
  %156 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %151, <vscale x 4 x i32> %153, i64 %4)
  %157 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
  %158 = getelementptr inbounds i32, ptr %0, i64 6656
  %159 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %154, <vscale x 4 x i32> %156, i64 %4)
  %160 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
  %161 = getelementptr inbounds i32, ptr %0, i64 6784
  %162 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %157, <vscale x 4 x i32> %159, i64 %4)
  %163 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
  %164 = getelementptr inbounds i32, ptr %0, i64 6912
  %165 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %160, <vscale x 4 x i32> %162, i64 %4)
  %166 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
  %167 = getelementptr inbounds i32, ptr %0, i64 7040
  %168 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %163, <vscale x 4 x i32> %165, i64 %4)
  %169 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
  %170 = getelementptr inbounds i32, ptr %0, i64 7168
  %171 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %166, <vscale x 4 x i32> %168, i64 %4)
  %172 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
  %173 = getelementptr inbounds i32, ptr %0, i64 7296
  %174 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %169, <vscale x 4 x i32> %171, i64 %4)
  %175 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
  %176 = getelementptr inbounds i32, ptr %0, i64 7424
  %177 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %172, <vscale x 4 x i32> %174, i64 %4)
  %178 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
  %179 = getelementptr inbounds i32, ptr %0, i64 7552
  %180 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %175, <vscale x 4 x i32> %177, i64 %4)
  %181 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
  %182 = getelementptr inbounds i32, ptr %0, i64 7680
  %183 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %178, <vscale x 4 x i32> %180, i64 %4)
  %184 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
  %185 = getelementptr inbounds i32, ptr %0, i64 7808
  %186 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %181, <vscale x 4 x i32> %183, i64 %4)
  %187 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
  %188 = getelementptr inbounds i32, ptr %0, i64 7936
  %189 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %184, <vscale x 4 x i32> %186, i64 %4)
  %190 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
  %191 = getelementptr inbounds i32, ptr %0, i64 8064
  %192 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %187, <vscale x 4 x i32> %189, i64 %4)
  %193 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
  %194 = getelementptr inbounds i32, ptr %0, i64 8192
  %195 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %194, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %190, <vscale x 4 x i32> %192, i64 %4)
  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %193, <vscale x 4 x i32> %195, i64 %4)
  %196 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  %197 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  %198 = getelementptr inbounds i32, ptr %1, i64 256
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %196, ptr %198, i64 %4)
  %199 = getelementptr inbounds i32, ptr %1, i64 384
  %200 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %197, ptr %199, i64 %4)
  %201 = getelementptr inbounds i32, ptr %1, i64 512
  %202 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %200, ptr %201, i64 %4)
  %203 = getelementptr inbounds i32, ptr %1, i64 640
  %204 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %202, ptr %203, i64 %4)
  %205 = getelementptr inbounds i32, ptr %1, i64 768
  %206 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %204, ptr %205, i64 %4)
  %207 = getelementptr inbounds i32, ptr %1, i64 896
  %208 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %206, ptr %207, i64 %4)
  %209 = getelementptr inbounds i32, ptr %1, i64 1024
  %210 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %208, ptr %209, i64 %4)
  %211 = getelementptr inbounds i32, ptr %1, i64 1152
  %212 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %210, ptr %211, i64 %4)
  %213 = getelementptr inbounds i32, ptr %1, i64 1280
  %214 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %212, ptr %213, i64 %4)
  %215 = getelementptr inbounds i32, ptr %1, i64 1408
  %216 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %214, ptr %215, i64 %4)
  %217 = getelementptr inbounds i32, ptr %1, i64 1536
  %218 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %216, ptr %217, i64 %4)
  %219 = getelementptr inbounds i32, ptr %1, i64 1664
  %220 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %218, ptr %219, i64 %4)
  %221 = getelementptr inbounds i32, ptr %1, i64 1792
  %222 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %220, ptr %221, i64 %4)
  %223 = getelementptr inbounds i32, ptr %1, i64 1920
  %224 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %222, ptr %223, i64 %4)
  %225 = getelementptr inbounds i32, ptr %1, i64 2048
  %226 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %224, ptr %225, i64 %4)
  %227 = getelementptr inbounds i32, ptr %1, i64 2176
  %228 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %226, ptr %227, i64 %4)
  %229 = getelementptr inbounds i32, ptr %1, i64 2304
  %230 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %228, ptr %229, i64 %4)
  %231 = getelementptr inbounds i32, ptr %1, i64 2432
  %232 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %230, ptr %231, i64 %4)
  %233 = getelementptr inbounds i32, ptr %1, i64 2560
  %234 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %232, ptr %233, i64 %4)
  %235 = getelementptr inbounds i32, ptr %1, i64 2688
  %236 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %234, ptr %235, i64 %4)
  %237 = getelementptr inbounds i32, ptr %1, i64 2816
  %238 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %236, ptr %237, i64 %4)
  %239 = getelementptr inbounds i32, ptr %1, i64 2944
  %240 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %238, ptr %239, i64 %4)
  %241 = getelementptr inbounds i32, ptr %1, i64 3072
  %242 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %240, ptr %241, i64 %4)
  %243 = getelementptr inbounds i32, ptr %1, i64 3200
  %244 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %242, ptr %243, i64 %4)
  %245 = getelementptr inbounds i32, ptr %1, i64 3328
  %246 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %244, ptr %245, i64 %4)
  %247 = getelementptr inbounds i32, ptr %1, i64 3456
  %248 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %246, ptr %247, i64 %4)
  %249 = getelementptr inbounds i32, ptr %1, i64 3584
  %250 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %248, ptr %249, i64 %4)
  %251 = getelementptr inbounds i32, ptr %1, i64 3712
  %252 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %250, ptr %251, i64 %4)
  %253 = getelementptr inbounds i32, ptr %1, i64 3840
  %254 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %252, ptr %253, i64 %4)
  %255 = getelementptr inbounds i32, ptr %1, i64 3968
  %256 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %254, ptr %255, i64 %4)
  %257 = getelementptr inbounds i32, ptr %1, i64 4096
  %258 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %256, ptr %257, i64 %4)
  %259 = getelementptr inbounds i32, ptr %1, i64 4224
  %260 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %258, ptr %259, i64 %4)
  %261 = getelementptr inbounds i32, ptr %1, i64 4352
  %262 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %260, ptr %261, i64 %4)
  %263 = getelementptr inbounds i32, ptr %1, i64 4480
  %264 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %262, ptr %263, i64 %4)
  %265 = getelementptr inbounds i32, ptr %1, i64 4608
  %266 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %264, ptr %265, i64 %4)
  %267 = getelementptr inbounds i32, ptr %1, i64 4736
  %268 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %266, ptr %267, i64 %4)
  %269 = getelementptr inbounds i32, ptr %1, i64 4864
  %270 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %268, ptr %269, i64 %4)
  %271 = getelementptr inbounds i32, ptr %1, i64 4992
  %272 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %270, ptr %271, i64 %4)
  %273 = getelementptr inbounds i32, ptr %1, i64 5120
  %274 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %272, ptr %273, i64 %4)
  %275 = getelementptr inbounds i32, ptr %1, i64 5248
  %276 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %274, ptr %275, i64 %4)
  %277 = getelementptr inbounds i32, ptr %1, i64 5376
  %278 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %276, ptr %277, i64 %4)
  %279 = getelementptr inbounds i32, ptr %1, i64 5504
  %280 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %278, ptr %279, i64 %4)
  %281 = getelementptr inbounds i32, ptr %1, i64 5632
  %282 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %280, ptr %281, i64 %4)
  %283 = getelementptr inbounds i32, ptr %1, i64 5760
  %284 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %282, ptr %283, i64 %4)
  %285 = getelementptr inbounds i32, ptr %1, i64 5888
  %286 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %284, ptr %285, i64 %4)
  %287 = getelementptr inbounds i32, ptr %1, i64 6016
  %288 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %286, ptr %287, i64 %4)
  %289 = getelementptr inbounds i32, ptr %1, i64 6144
  %290 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %288, ptr %289, i64 %4)
  %291 = getelementptr inbounds i32, ptr %1, i64 6272
  %292 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %290, ptr %291, i64 %4)
  %293 = getelementptr inbounds i32, ptr %1, i64 6400
  %294 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %292, ptr %293, i64 %4)
  %295 = getelementptr inbounds i32, ptr %1, i64 6528
  %296 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %294, ptr %295, i64 %4)
  %297 = getelementptr inbounds i32, ptr %1, i64 6656
  %298 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %296, ptr %297, i64 %4)
  %299 = getelementptr inbounds i32, ptr %1, i64 6784
  %300 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %298, ptr %299, i64 %4)
  %301 = getelementptr inbounds i32, ptr %1, i64 6912
  %302 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %300, ptr %301, i64 %4)
  %303 = getelementptr inbounds i32, ptr %1, i64 7040
  %304 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %302, ptr %303, i64 %4)
  %305 = getelementptr inbounds i32, ptr %1, i64 7168
  %306 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %304, ptr %305, i64 %4)
  %307 = getelementptr inbounds i32, ptr %1, i64 7296
  %308 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %306, ptr %307, i64 %4)
  %309 = getelementptr inbounds i32, ptr %1, i64 7424
  %310 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %308, ptr %309, i64 %4)
  %311 = getelementptr inbounds i32, ptr %1, i64 7552
  %312 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %310, ptr %311, i64 %4)
  %313 = getelementptr inbounds i32, ptr %1, i64 7680
  %314 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %312, ptr %313, i64 %4)
  %315 = getelementptr inbounds i32, ptr %1, i64 7808
  %316 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %314, ptr %315, i64 %4)
  %317 = getelementptr inbounds i32, ptr %1, i64 7936
  %318 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %316, ptr %317, i64 %4)
  %319 = getelementptr inbounds i32, ptr %1, i64 8064
  %320 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %318, ptr %319, i64 %4)
  %321 = getelementptr inbounds i32, ptr %1, i64 8192
  %322 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %320, ptr %321, i64 %4)
  %323 = getelementptr inbounds i32, ptr %1, i64 8320
  %324 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %322, ptr %323, i64 %4)
  %325 = getelementptr inbounds i32, ptr %1, i64 8448
  %326 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %324, ptr %325, i64 %4)
  %327 = getelementptr inbounds i32, ptr %1, i64 8576
  %328 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %326, ptr %327, i64 %4)
  %329 = getelementptr inbounds i32, ptr %1, i64 8704
  %330 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %328, ptr %329, i64 %4)
  %331 = getelementptr inbounds i32, ptr %1, i64 8832
  %332 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %330, ptr %331, i64 %4)
  %333 = getelementptr inbounds i32, ptr %1, i64 8960
  %334 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %332, ptr %333, i64 %4)
  %335 = getelementptr inbounds i32, ptr %1, i64 9088
  %336 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %334, ptr %335, i64 %4)
  %337 = getelementptr inbounds i32, ptr %1, i64 9216
  %338 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %336, ptr %337, i64 %4)
  %339 = getelementptr inbounds i32, ptr %1, i64 9344
  %340 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %338, ptr %339, i64 %4)
  %341 = getelementptr inbounds i32, ptr %1, i64 9472
  %342 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %340, ptr %341, i64 %4)
  %343 = getelementptr inbounds i32, ptr %1, i64 9600
  %344 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %342, ptr %343, i64 %4)
  %345 = getelementptr inbounds i32, ptr %1, i64 9728
  %346 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %344, ptr %345, i64 %4)
  %347 = getelementptr inbounds i32, ptr %1, i64 9856
  %348 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %346, ptr %347, i64 %4)
  %349 = getelementptr inbounds i32, ptr %1, i64 9984
  %350 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %348, ptr %349, i64 %4)
  %351 = getelementptr inbounds i32, ptr %1, i64 10112
  %352 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %350, ptr %351, i64 %4)
  %353 = getelementptr inbounds i32, ptr %1, i64 10240
  %354 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %352, ptr %353, i64 %4)
  %355 = getelementptr inbounds i32, ptr %1, i64 10368
  %356 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %354, ptr %355, i64 %4)
  %357 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
  ret void
}

declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)
declare void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64, i64, <vscale x 4 x i32>, <vscale x 4 x i32>, i64)
declare <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64, i64, i64, i64)
declare void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)