; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s
; The following tests check whether inserting VSETVLI avoids inserting
; unneeded vsetvlis across basic blocks.
declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double>, double, i64)
declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float>, float, i64)
declare void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double>, ptr nocapture, i64)
declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, ptr nocapture, i64)
define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: beqz a1, .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: # %if.else
; CHECK-NEXT: vfsub.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
ret <vscale x 1 x double> %c.0
}
@scratch = global i8 0, align 16
define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: beqz a1, .LBB1_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_2: # %if.else
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
ret <vscale x 1 x double> %3
}
define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a1, .LBB2_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_2: # %if.else
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %2)
br label %if.end
if.end: ; preds = %if.else, %if.then
%vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ]
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %3, %if.else ]
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %vl.0)
ret <vscale x 1 x double> %4
}
define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %l, <vscale x 1 x double> %r) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a1, .LBB3_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: lui a1, %hi(.LCPI3_1)
; CHECK-NEXT: fld fa4, %lo(.LCPI3_1)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.v.f v10, fa5
; CHECK-NEXT: vfmv.v.f v11, fa4
; CHECK-NEXT: vfadd.vv v10, v10, v11
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse64.v v10, (a1)
; CHECK-NEXT: j .LBB3_3
; CHECK-NEXT: .LBB3_2: # %if.else
; CHECK-NEXT: lui a1, 260096
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v10, a1
; CHECK-NEXT: lui a1, 262144
; CHECK-NEXT: vmv.v.x v11, a1
; CHECK-NEXT: vfadd.vv v10, v10, v11
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: .LBB3_3: # %if.end
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmul.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%0 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %avl)
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %avl)
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 7, i64 %avl)
%3 = bitcast ptr @scratch to ptr
tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %2, ptr %3, i64 %avl)
br label %if.end
if.else: ; preds = %entry
%4 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %avl)
%5 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 2.000000e+00, i64 %avl)
%6 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %4, <vscale x 2 x float> %5, i64 7, i64 %avl)
%7 = bitcast ptr @scratch to ptr
tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %6, ptr %7, i64 %avl)
br label %if.end
if.end: ; preds = %if.else, %if.then
%8 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %l, <vscale x 1 x double> %r, i64 7, i64 %avl)
ret <vscale x 1 x double> %8
}
define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: bnez a2, .LBB4_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: beqz a1, .LBB4_4
; CHECK-NEXT: .LBB4_2: # %if.then4
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB4_3: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: bnez a1, .LBB4_2
; CHECK-NEXT: .LBB4_4: # %if.else5
; CHECK-NEXT: vfmul.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%conv = zext i8 %cond to i32
%and = and i32 %conv, 1
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%and2 = and i32 %conv, 2
%tobool3 = icmp eq i32 %and2, 0
br i1 %tobool3, label %if.else5, label %if.then4
if.then4: ; preds = %if.end
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
br label %if.end6
if.else5: ; preds = %if.end
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %c.0, i64 7, i64 %0)
br label %if.end6
if.end6: ; preds = %if.else5, %if.then4
%c.1 = phi <vscale x 1 x double> [ %3, %if.then4 ], [ %4, %if.else5 ]
ret <vscale x 1 x double> %c.1
}
; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant
; with the one in the entry, but we lack the ability to remove explicit
; vsetvli instructions.
define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: bnez a2, .LBB5_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v8, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: beqz a1, .LBB5_4
; CHECK-NEXT: .LBB5_2: # %if.then4
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: lui a1, %hi(.LCPI5_1)
; CHECK-NEXT: fld fa4, %lo(.LCPI5_1)(a1)
; CHECK-NEXT: vfmv.v.f v9, fa5
; CHECK-NEXT: vfmv.v.f v10, fa4
; CHECK-NEXT: vfadd.vv v9, v9, v10
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: j .LBB5_5
; CHECK-NEXT: .LBB5_3: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: bnez a1, .LBB5_2
; CHECK-NEXT: .LBB5_4: # %if.else5
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: lui a1, 260096
; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: lui a1, 262144
; CHECK-NEXT: vmv.v.x v10, a1
; CHECK-NEXT: vfadd.vv v9, v9, v10
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse32.v v9, (a1)
; CHECK-NEXT: .LBB5_5: # %if.end10
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmul.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%conv = zext i8 %cond to i32
%and = and i32 %conv, 1
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%and2 = and i32 %conv, 2
%tobool3 = icmp eq i32 %and2, 0
br i1 %tobool3, label %if.else5, label %if.then4
if.then4: ; preds = %if.end
%3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %3)
%5 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %3)
%6 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %4, <vscale x 1 x double> %5, i64 7, i64 %3)
%7 = bitcast ptr @scratch to ptr
tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %6, ptr %7, i64 %3)
br label %if.end10
if.else5: ; preds = %if.end
%8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0)
%9 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %8)
%10 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float> undef, float 2.000000e+00, i64 %8)
%11 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %9, <vscale x 2 x float> %10, i64 7, i64 %8)
%12 = bitcast ptr @scratch to ptr
tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %11, ptr %12, i64 %8)
br label %if.end10
if.end10: ; preds = %if.else5, %if.then4
%13 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %c.0, i64 7, i64 %0)
ret <vscale x 1 x double> %13
}
declare void @foo()
; Similar to test1, but contains a call to @foo to act as barrier to analyzing
; VL/VTYPE.
define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB6_2: # %if.else
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call foo
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma
; CHECK-NEXT: vfsub.vv v8, v9, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.else: ; preds = %entry
call void @foo()
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
ret <vscale x 1 x double> %c.0
}
; Similar to test2, but contains a call to @foo to act as barrier to analyzing
; VL/VTYPE.
define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: beqz a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call foo
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: j .LBB7_3
; CHECK-NEXT: .LBB7_2: # %if.else
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: .LBB7_3: # %if.end
; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
call void @foo()
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0)
ret <vscale x 1 x double> %3
}
define void @saxpy_vec(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
; CHECK-LABEL: saxpy_vec:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
; CHECK-NEXT: beqz a3, .LBB8_2
; CHECK-NEXT: .LBB8_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: vle32.v v16, (a2)
; CHECK-NEXT: slli a4, a3, 2
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma
; CHECK-NEXT: vfmacc.vf v16, fa0, v8
; CHECK-NEXT: vse32.v v16, (a2)
; CHECK-NEXT: sub a0, a0, a3
; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
; CHECK-NEXT: add a2, a2, a4
; CHECK-NEXT: bnez a3, .LBB8_1
; CHECK-NEXT: .LBB8_2: # %for.end
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
%cmp.not13 = icmp eq i64 %0, 0
br i1 %cmp.not13, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
%n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
%x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
%y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%2 = bitcast ptr %x.addr.015 to ptr
%3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
%add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
%4 = bitcast ptr %y.addr.014 to ptr
%5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
%6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
%add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
%sub = sub i64 %n.addr.016, %1
%7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 2, i64 3)
%cmp.not = icmp eq i64 %7, 0
br i1 %cmp.not, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
; CHECK-LABEL: saxpy_vec_demanded_fields:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma
; CHECK-NEXT: beqz a3, .LBB9_2
; CHECK-NEXT: .LBB9_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: vle32.v v16, (a2)
; CHECK-NEXT: slli a4, a3, 2
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma
; CHECK-NEXT: vfmacc.vf v16, fa0, v8
; CHECK-NEXT: vse32.v v16, (a2)
; CHECK-NEXT: sub a0, a0, a3
; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma
; CHECK-NEXT: add a2, a2, a4
; CHECK-NEXT: bnez a3, .LBB9_1
; CHECK-NEXT: .LBB9_2: # %for.end
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
%cmp.not13 = icmp eq i64 %0, 0
br i1 %cmp.not13, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
%n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
%x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
%y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
%2 = bitcast ptr %x.addr.015 to ptr
%3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
%add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
%4 = bitcast ptr %y.addr.014 to ptr
%5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
%6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
%add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
%sub = sub i64 %n.addr.016, %1
%7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 1, i64 2)
%cmp.not = icmp eq i64 %7, 0
br i1 %cmp.not, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64, i64, i64)
declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
; We need a vsetvli in the last block because the predecessors have different
; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
; we don't need to read AVL and can keep VL unchanged.
define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind {
; CHECK-LABEL: test_vsetvli_x0_x0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: andi a3, a3, 1
; CHECK-NEXT: beqz a3, .LBB10_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vle16.v v10, (a1)
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwcvt.x.x.v v8, v10
; CHECK-NEXT: .LBB10_2: # %if.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl)
br i1 %cond, label %if, label %if.end
if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 %vl)
br label %if.end
if.end:
%d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
%e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
ret <vscale x 2 x i32> %e
}
declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>, ptr, i64)
declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>, ptr, i64)
declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i16>, i16, i64)
declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64)
; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
; a predecessor we know the vtype for.
define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind {
; CHECK-LABEL: test_vsetvli_x0_x0_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: andi a4, a4, 1
; CHECK-NEXT: beqz a4, .LBB11_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vle16.v v10, (a1)
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwadd.wv v9, v9, v10
; CHECK-NEXT: .LBB11_2: # %if.end
; CHECK-NEXT: andi a5, a5, 1
; CHECK-NEXT: beqz a5, .LBB11_4
; CHECK-NEXT: # %bb.3: # %if2
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a2)
; CHECK-NEXT: vwadd.wv v9, v9, v10
; CHECK-NEXT: .LBB11_4: # %if2.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl)
br i1 %cond, label %if, label %if.end
if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
br label %if.end
if.end:
%d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
br i1 %cond2, label %if2, label %if2.end
if2:
%e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %z, i64 %vl)
%f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
br label %if2.end
if2.end:
%g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
%h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl)
ret <vscale x 2 x i32> %h
}
declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64)
; We should only need 1 vsetvli for this code.
define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
; CHECK-LABEL: vlmax:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: blez a0, .LBB12_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma
; CHECK-NEXT: slli a5, a6, 3
; CHECK-NEXT: .LBB12_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: vle64.v v9, (a3)
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: add a4, a4, a6
; CHECK-NEXT: add a1, a1, a5
; CHECK-NEXT: add a3, a3, a5
; CHECK-NEXT: add a2, a2, a5
; CHECK-NEXT: blt a4, a0, .LBB12_2
; CHECK-NEXT: .LBB12_3: # %for.end
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
%cmp13 = icmp sgt i64 %N, 0
br i1 %cmp13, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds double, ptr %a, i64 %i.014
%1 = bitcast ptr %arrayidx to ptr
%2 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %1, i64 %0)
%arrayidx1 = getelementptr inbounds double, ptr %b, i64 %i.014
%3 = bitcast ptr %arrayidx1 to ptr
%4 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %3, i64 %0)
%5 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double> %2, <vscale x 1 x double> %4, i64 7, i64 %0)
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
%6 = bitcast ptr %arrayidx2 to ptr
tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> %5, ptr %6, i64 %0)
%add = add nuw nsw i64 %i.014, %0
%cmp = icmp slt i64 %add, %N
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
; A single vector store in the loop with VL controlled by VLMAX
define void @vector_init_vlmax(i64 %N, ptr %c) {
; CHECK-LABEL: vector_init_vlmax:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: blez a0, .LBB13_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
; CHECK-NEXT: slli a4, a3, 3
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB13_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: add a2, a2, a3
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: blt a2, a0, .LBB13_2
; CHECK-NEXT: .LBB13_3: # %for.end
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
%cmp13 = icmp sgt i64 %N, 0
br i1 %cmp13, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
%addr = bitcast ptr %arrayidx2 to ptr
tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
%add = add nuw nsw i64 %i.014, %0
%cmp = icmp slt i64 %add, %N
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
; Same as above, but VL comes from user provided AVL value
define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
; CHECK-LABEL: vector_init_vsetvli_N:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: blez a0, .LBB14_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetvli a3, a0, e64, m1, ta, ma
; CHECK-NEXT: slli a4, a3, 3
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB14_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: add a2, a2, a3
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: blt a2, a0, .LBB14_2
; CHECK-NEXT: .LBB14_3: # %for.end
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0)
%cmp13 = icmp sgt i64 %N, 0
br i1 %cmp13, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
%addr = bitcast ptr %arrayidx2 to ptr
tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
%add = add nuw nsw i64 %i.014, %0
%cmp = icmp slt i64 %add, %N
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
; Same as above, but VL is a hard coded constant (in the preheader)
define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
; CHECK-LABEL: vector_init_vsetvli_fv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, ma
; CHECK-NEXT: slli a4, a3, 3
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB15_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: add a2, a2, a3
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: blt a2, a0, .LBB15_1
; CHECK-NEXT: # %bb.2: # %for.end
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0)
br label %for.body
for.body: ; preds = %entry, %for.body
%i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
%addr = bitcast ptr %arrayidx2 to ptr
tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0)
%add = add nuw nsw i64 %i.014, %0
%cmp = icmp slt i64 %add, %N
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
; Same as above, but result of vsetvli in preheader isn't used, and
; constant is repeated in loop
define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) {
; CHECK-LABEL: vector_init_vsetvli_fv2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB16_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: addi a1, a1, 32
; CHECK-NEXT: blt a2, a0, .LBB16_1
; CHECK-NEXT: # %bb.2: # %for.end
; CHECK-NEXT: ret
entry:
tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0)
br label %for.body
for.body: ; preds = %entry, %for.body
%i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
%addr = bitcast ptr %arrayidx2 to ptr
tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4)
%add = add nuw nsw i64 %i.014, 4
%cmp = icmp slt i64 %add, %N
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
; Same as above, but AVL is only specified on the store intrinsic
; This case will require some form of hoisting or PRE
define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) {
; CHECK-LABEL: vector_init_vsetvli_fv3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB17_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: addi a1, a1, 32
; CHECK-NEXT: blt a2, a0, .LBB17_1
; CHECK-NEXT: # %bb.2: # %for.end
; CHECK-NEXT: ret
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ]
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014
%addr = bitcast ptr %arrayidx2 to ptr
tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4)
%add = add nuw nsw i64 %i.014, 4
%cmp = icmp slt i64 %add, %N
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
; Demonstrates a case where mutation in phase3 is problematic. We mutate the
; vsetvli without considering that it changes the compatibility result of the
; vadd in the second block.
define <vscale x 4 x i32> @cross_block_mutate(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
; CHECK-LABEL: cross_block_mutate:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
<vscale x 4 x i1> %mask) {
entry:
%vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 1, i64 0)
%vl.trunc = trunc i64 %vl to i32
%a.mod = insertelement <vscale x 4 x i32> %a, i32 %vl.trunc, i32 0
br label %fallthrough
fallthrough:
%res = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(
<vscale x 4 x i32> undef, <vscale x 4 x i32> %a.mod,
<vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, i64 %vl, i64 0)
ret <vscale x 4 x i32> %res
}
define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i1 %cond) nounwind {
; CHECK-LABEL: pre_lmul:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
%a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl)
br i1 %cond, label %if, label %if.end
if:
; Deliberately change vtype - this could be an unknown call, but the broader
; code quality is distractingly bad
tail call i64 @llvm.riscv.vsetvlimax.i64(i64 2, i64 1)
br label %if.end
if.end:
%b = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %y, i64 %vl)
ret <vscale x 2 x i32> %b
}
define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b, ptr %p1, <vscale x 1 x float> %c, ptr %p2) {
; CHECK-LABEL: compat_store_consistency:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: beqz a0, .LBB20_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vse32.v v10, (a2)
; CHECK-NEXT: .LBB20_2: # %if.end
; CHECK-NEXT: ret
entry:
%res = fadd <vscale x 1 x double> %a, %b
store <vscale x 1 x double> %res, ptr %p1
br i1 %cond, label %if.then, label %if.end
if.then: ; preds = %entry
store <vscale x 1 x float> %c, ptr %p2
br label %if.end
if.end: ; preds = %if.else, %if.then
ret <vscale x 1 x double> %res
}
; Next two tests (which are the same except for swapped block order), make sure that the
; demanded reasoning around vmv.s.x correctly handles a forward state with only a valid
; SEWLMULRatio. We previously had a crash bug in this case.
define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nounwind {
; CHECK-LABEL: test_ratio_only_vmv_s_x:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a2, 1
; CHECK-NEXT: beqz a2, .LBB21_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a1)
; CHECK-NEXT: vwcvt.x.x.v v8, v9
; CHECK-NEXT: j .LBB21_3
; CHECK-NEXT: .LBB21_2:
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: .LBB21_3: # %if.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2)
br i1 %cond, label %if, label %if.end
if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2)
br label %if.end
if.end:
%d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
%e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0
ret <vscale x 2 x i32> %e
}
define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) nounwind {
; CHECK-LABEL: test_ratio_only_vmv_s_x2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a2, 1
; CHECK-NEXT: beqz a2, .LBB22_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: j .LBB22_3
; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a1)
; CHECK-NEXT: vwcvt.x.x.v v8, v9
; CHECK-NEXT: .LBB22_3: # %if.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: ret
entry:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2)
br i1 %cond, label %if, label %if.end
if:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2)
br label %if.end
if.end:
%d = phi <vscale x 2 x i32> [ %a, %if ], [ %c, %entry ]
%e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0
ret <vscale x 2 x i32> %e
}
; This case demonstrates a PRE case where the first instruction in the block
; doesn't require a state transition.
define void @pre_over_vle(ptr %A) {
; CHECK-LABEL: pre_over_vle:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi a1, a0, 800
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB23_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsext.vf4 v9, v8
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: addi a0, a0, 8
; CHECK-NEXT: bne a0, a1, .LBB23_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: ret
entry:
br label %vector.body
vector.body:
%iv = phi i64 [ 0, %entry], [%iv.next, %vector.body]
%addr = getelementptr inbounds <2 x i32>, ptr %A, i64 %iv
%v = load <2 x i8>, ptr %addr
%v2 = sext <2 x i8> %v to <2 x i32>
store <2 x i32> %v2, ptr %addr
%iv.next = add i64 %iv, 1
%cmp = icmp ne i64 %iv.next, 100
br i1 %cmp, label %vector.body, label %exit
exit:
ret void
}
declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64)
declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64)
declare void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64)
declare <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(
<vscale x 4 x i32>,
<vscale x 4 x i32>,
<vscale x 4 x i32>,
<vscale x 4 x i1>,
i64,
i64);
; Normally a pseudo's AVL is already live in its block, so it will already be
; live where we're inserting the vsetvli, before the pseudo. In some cases the
; AVL can be from a predecessor block, so make sure we extend its live range
; across blocks.
define <vscale x 2 x i32> @cross_block_avl_extend(i64 %avl, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
; CHECK-LABEL: cross_block_avl_extend:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v9, v8, v9
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
; Get the output vl from a vsetvli
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 0)
; Force a vsetvli toggle so we need to insert a new vsetvli in exit
%d = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b, i64 1)
br label %exit
exit:
; The use of the vl from the vsetvli will be replaced with its %avl because
; VLMAX is the same. So %avl, which was previously only live in %entry, will
; need to be extended down toe %exit.
%c = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
ret <vscale x 2 x i32> %c
}
define void @cross_block_avl_extend_backwards(i1 %cond, <vscale x 8 x i8> %v, ptr %p, i64 %avl) {
; CHECK-LABEL: cross_block_avl_extend_backwards:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: beqz a0, .LBB25_2
; CHECK-NEXT: # %bb.1: # %exit
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB25_2: # %bar
; CHECK-NEXT: addi a2, a2, 1
; CHECK-NEXT: .LBB25_3: # %foo
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: j .LBB25_3
entry:
br i1 %cond, label %exit, label %bar
foo:
; Force a vl toggle
call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 1)
; %add's LiveRange needs to be extended backwards to here.
call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 %add)
br label %foo
exit:
ret void
bar:
%add = add i64 %avl, 1
br label %foo
}
define void @vlmax_avl_phi(i1 %cmp, ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: vlmax_avl_phi:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: beqz a0, .LBB26_2
; CHECK-NEXT: # %bb.1: # %foo
; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
; CHECK-NEXT: j .LBB26_3
; CHECK-NEXT: .LBB26_2: # %bar
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; CHECK-NEXT: .LBB26_3: # %exit
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
entry:
br i1 %cmp, label %foo, label %bar
foo:
%vl.foo = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a, i64 0, i64 0)
br label %exit
bar:
%vl.bar = tail call i64 @llvm.riscv.vsetvli.i64(i64 %b, i64 0, i64 0)
br label %exit
exit:
%phivl = phi i64 [ %vl.foo, %foo ], [ %vl.bar, %bar ]
%1 = tail call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 0, i64 %phivl)
call void @llvm.riscv.vse.nxv8i8(<vscale x 8 x i8> %1, ptr %p, i64 1)
ret void
}
; Check that if we forward an AVL whose value is clobbered in its LiveInterval
; we emit a copy instead.
define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v, i1 %cmp) {
; CHECK-LABEL: clobbered_forwarded_avl:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: .LBB27_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: bnez a1, .LBB27_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vadd.vv v10, v8, v8
; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 1)
br label %for.body
for.body:
; Use %n in a PHI here so its virtual register is assigned to a second time here.
%1 = phi i64 [ %3, %for.body ], [ %n, %entry ]
%2 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %1, i64 0, i64 0)
%3 = add i64 %1, 1
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
%4 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %v, <vscale x 4 x i32> %v, i64 -1)
; VL toggle needed here: If the %n AVL was forwarded here we wouldn't be able
; to extend it's LiveInterval because it would clobber the assignment at %1.
%5 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %4, <vscale x 4 x i32> %v, i64 %0)
ret <vscale x 4 x i32> %5
}