; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv64 -mattr=+v,+c < %s | FileCheck %s
; This previously crashed when spilling a GPR because when we removed a dead
; ADDI we weren't removing it from the LIS instruction map. Needs +c to trigger.
define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -112
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: .cfi_offset s1, -24
; CHECK-NEXT: .cfi_offset s2, -32
; CHECK-NEXT: .cfi_offset s3, -40
; CHECK-NEXT: .cfi_offset s4, -48
; CHECK-NEXT: .cfi_offset s5, -56
; CHECK-NEXT: .cfi_offset s6, -64
; CHECK-NEXT: .cfi_offset s7, -72
; CHECK-NEXT: .cfi_offset s8, -80
; CHECK-NEXT: .cfi_offset s9, -88
; CHECK-NEXT: .cfi_offset s10, -96
; CHECK-NEXT: .cfi_offset s11, -104
; CHECK-NEXT: li s2, 0
; CHECK-NEXT: li a7, 8
; CHECK-NEXT: li t0, 12
; CHECK-NEXT: li s0, 4
; CHECK-NEXT: li t1, 20
; CHECK-NEXT: ld a1, 112(sp)
; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: andi t3, a4, 1
; CHECK-NEXT: li t2, 4
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
; CHECK-NEXT: mv t4, t1
; CHECK-NEXT: mv t5, t2
; CHECK-NEXT: mv t6, t0
; CHECK-NEXT: mv s3, a7
; CHECK-NEXT: mv a6, s2
; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Loop Header: Depth=2
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
; CHECK-NEXT: mv s5, t4
; CHECK-NEXT: mv s6, t5
; CHECK-NEXT: mv s7, t6
; CHECK-NEXT: mv s8, s3
; CHECK-NEXT: mv s4, a6
; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # => This Loop Header: Depth=3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
; CHECK-NEXT: mv s11, s5
; CHECK-NEXT: mv a3, s6
; CHECK-NEXT: mv ra, s7
; CHECK-NEXT: mv a4, s8
; CHECK-NEXT: mv s9, s4
; CHECK-NEXT: .LBB0_4: # %vector.ph.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
; CHECK-NEXT: # => This Loop Header: Depth=4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
; CHECK-NEXT: li a5, 0
; CHECK-NEXT: .LBB0_5: # %vector.body.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
; CHECK-NEXT: # Parent Loop BB0_4 Depth=4
; CHECK-NEXT: # => This Inner Loop Header: Depth=5
; CHECK-NEXT: addi s1, a5, 4
; CHECK-NEXT: add a1, a4, a5
; CHECK-NEXT: vse32.v v8, (a1), v0.t
; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: vse32.v v8, (a5), v0.t
; CHECK-NEXT: mv a5, s1
; CHECK-NEXT: bne s1, s0, .LBB0_5
; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i
; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4
; CHECK-NEXT: addi s9, s9, 4
; CHECK-NEXT: addi a4, a4, 4
; CHECK-NEXT: addi ra, ra, 4
; CHECK-NEXT: addi a3, a3, 4
; CHECK-NEXT: andi s10, a0, 1
; CHECK-NEXT: addi s11, s11, 4
; CHECK-NEXT: beqz s10, .LBB0_4
; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3
; CHECK-NEXT: addi s4, s4, 4
; CHECK-NEXT: addi s8, s8, 4
; CHECK-NEXT: addi s7, s7, 4
; CHECK-NEXT: addi s6, s6, 4
; CHECK-NEXT: andi a1, a2, 1
; CHECK-NEXT: addi s5, s5, 4
; CHECK-NEXT: beqz a1, .LBB0_3
; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2
; CHECK-NEXT: addi a6, a6, 4
; CHECK-NEXT: addi s3, s3, 4
; CHECK-NEXT: addi t6, t6, 4
; CHECK-NEXT: addi t5, t5, 4
; CHECK-NEXT: addi t4, t4, 4
; CHECK-NEXT: beqz t3, .LBB0_2
; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: addi s2, s2, 4
; CHECK-NEXT: addi a7, a7, 4
; CHECK-NEXT: addi t0, t0, 4
; CHECK-NEXT: addi t2, t2, 4
; CHECK-NEXT: addi t1, t1, 4
; CHECK-NEXT: beqz a1, .LBB0_1
; CHECK-NEXT: # %bb.10: # %l.exit
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: jalr a0
; CHECK-NEXT: beqz s10, .LBB0_12
; CHECK-NEXT: .LBB0_11: # %for.body7.us.14
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: j .LBB0_11
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v16, v8, 1
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vmsne.vi v8, v16, 0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: sb a0, 0(zero)
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
br label %for.cond1.preheader.i
for.cond1.preheader.i: ; preds = %for.cond.cleanup3.i, %entry
%arg.21 = phi i64 [ 0, %entry ], [ %indvars.iv.next74.i, %for.cond.cleanup3.i ]
br label %for.cond5.preheader.i
for.cond5.preheader.i: ; preds = %for.cond.cleanup7.i, %for.cond1.preheader.i
%arg.42 = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next70.i, %for.cond.cleanup7.i ]
%1 = add i64 %arg.42, %arg.21
br label %for.cond9.preheader.i
for.cond.cleanup3.i: ; preds = %for.cond.cleanup7.i
%indvars.iv.next74.i = add i64 %arg.21, 1
br i1 %arg.3, label %l.exit, label %for.cond1.preheader.i
for.cond9.preheader.i: ; preds = %for.cond.cleanup11.i, %for.cond5.preheader.i
%arg.74 = phi i64 [ 0, %for.cond5.preheader.i ], [ %indvars.iv.next66.i, %for.cond.cleanup11.i ]
%2 = add i64 %1, %arg.74
br label %vector.ph.i
for.cond.cleanup7.i: ; preds = %for.cond.cleanup11.i
%indvars.iv.next70.i = add i64 %arg.42, 1
br i1 %arg.5, label %for.cond.cleanup3.i, label %for.cond5.preheader.i
vector.ph.i: ; preds = %for.cond.cleanup15.i, %for.cond9.preheader.i
%arg.96 = phi i64 [ 0, %for.cond9.preheader.i ], [ %indvars.iv.next62.i, %for.cond.cleanup15.i ]
%3 = add i64 %2, %arg.96
%broadcast.splatinsert.i = insertelement <vscale x 4 x i64> zeroinitializer, i64 %3, i64 0
%broadcast.splat.i = shufflevector <vscale x 4 x i64> %broadcast.splatinsert.i, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer
br label %vector.body.i
vector.body.i: ; preds = %vector.body.i, %vector.ph.i
%index.i = phi i64 [ 0, %vector.ph.i ], [ %index.next.i, %vector.body.i ]
%vec.ind.i = phi <vscale x 4 x i64> [ %0, %vector.ph.i ], [ %6, %vector.body.i ]
%4 = add <vscale x 4 x i64> %vec.ind.i, %broadcast.splat.i
%5 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %4
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %5, i32 4, <vscale x 4 x i1> zeroinitializer)
%6 = add <vscale x 4 x i64> %vec.ind.i, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
%7 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %6
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %7, i32 4, <vscale x 4 x i1> zeroinitializer)
%arg.100 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
%arg.101 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.100
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.101, i32 4, <vscale x 4 x i1> %arg.6)
%arg.102 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
%arg.103 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.102
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.103, i32 4, <vscale x 4 x i1> zeroinitializer)
%arg.104 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
%arg.105 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.104
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.105, i32 4, <vscale x 4 x i1> %arg.6)
%arg.106 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 5, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
%arg.107 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.106
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.107, i32 4, <vscale x 4 x i1> zeroinitializer)
%index.next.i = add i64 %index.i, 1
%arg.108 = icmp eq i64 %index.i, 0
br i1 %arg.108, label %for.cond.cleanup15.i, label %vector.body.i
for.cond.cleanup11.i: ; preds = %for.cond.cleanup15.i
%indvars.iv.next66.i = add i64 %arg.74, 1
br i1 %arg.3, label %for.cond.cleanup7.i, label %for.cond9.preheader.i
for.cond.cleanup15.i: ; preds = %vector.body.i
%indvars.iv.next62.i = add i64 %arg.96, 1
br i1 %arg.1, label %for.cond.cleanup11.i, label %vector.ph.i
l.exit: ; preds = %for.cond.cleanup3.i
tail call void null()
br i1 %arg.1, label %for.body7.us.14, label %for.body7.us.19
for.body7.us.14: ; preds = %for.body7.us.14, %l.exit
br label %for.body7.us.14
for.body7.us.19: ; preds = %l.exit
%arg.109 = insertelement <32 x i32> zeroinitializer, i32 %arg.10, i64 1
%8 = icmp ne <32 x i32> %arg.109, zeroinitializer
%9 = bitcast <32 x i1> %8 to i32
%op.rdx13 = icmp ne i32 %9, 0
%op.rdx = zext i1 %op.rdx13 to i8
store i8 %op.rdx, ptr null, align 1
ret i32 0
}