llvm/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll

; REQUIRES: asserts
; RUN: opt -p loop-vectorize -debug-only=loop-vectorize -S -disable-output < %s 2>&1 | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define void @no_outer_loop(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %off, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'no_outer_loop'
; CHECK:      Calculating cost of runtime checks:
; CHECK-NOT:  We expect runtime memory checks to be hoisted out of the outer loop.
; CHECK:      Total cost of runtime checks: 4
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  br label %inner.loop

inner.loop:
  %inner.iv = phi i64 [ 0, %entry ], [ %inner.iv.next, %inner.loop ]
  %add.us = add nuw nsw i64 %inner.iv, %off
  %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
  %0 = load i8, ptr %arrayidx.us, align 1
  %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
  %1 = load i8, ptr %arrayidx7.us, align 1
  %add9.us = add i8 %1, %0
  store i8 %add9.us, ptr %arrayidx7.us, align 1
  %inner.iv.next = add nuw nsw i64 %inner.iv, 1
  %exitcond.not = icmp eq i64 %inner.iv.next, %n
  br i1 %exitcond.not, label %inner.exit, label %inner.loop

inner.exit:
  ret void
}

define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_no_tc'
; CHECK:      Calculating cost of runtime checks:
; CHECK:      We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 3
; CHECK:      Total cost of runtime checks: 3
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
  %mul.us = mul nsw i64 %outer.iv, %n
  br label %inner.loop

inner.loop:
  %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
  %add.us = add nuw nsw i64 %inner.iv, %mul.us
  %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
  %0 = load i8, ptr %arrayidx.us, align 1
  %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
  %1 = load i8, ptr %arrayidx7.us, align 1
  %add9.us = add i8 %1, %0
  store i8 %add9.us, ptr %arrayidx7.us, align 1
  %inner.iv.next = add nuw nsw i64 %inner.iv, 1
  %exitcond.not = icmp eq i64 %inner.iv.next, %n
  br i1 %exitcond.not, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
  %exitcond27.not = icmp eq i64 %outer.iv.next, %m
  br i1 %exitcond27.not, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}


define void @outer_known_tc3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3'
; CHECK:      Calculating cost of runtime checks:
; CHECK:      We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
; CHECK:      Total cost of runtime checks: 2
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
  %mul.us = mul nsw i64 %outer.iv, %n
  br label %inner.loop

inner.loop:
  %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
  %add.us = add nuw nsw i64 %inner.iv, %mul.us
  %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
  %0 = load i8, ptr %arrayidx.us, align 1
  %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
  %1 = load i8, ptr %arrayidx7.us, align 1
  %add9.us = add i8 %1, %0
  store i8 %add9.us, ptr %arrayidx7.us, align 1
  %inner.iv.next = add nuw nsw i64 %inner.iv, 1
  %exitcond.not = icmp eq i64 %inner.iv.next, %n
  br i1 %exitcond.not, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
  %exitcond26.not = icmp eq i64 %outer.iv.next, 3
  br i1 %exitcond26.not, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}


define void @outer_known_tc64(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc64'
; CHECK:      Calculating cost of runtime checks:
; CHECK:      We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 1
; CHECK:      Total cost of runtime checks: 1
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
  %mul.us = mul nsw i64 %outer.iv, %n
  br label %inner.loop

inner.loop:
  %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
  %add.us = add nuw nsw i64 %inner.iv, %mul.us
  %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
  %0 = load i8, ptr %arrayidx.us, align 1
  %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
  %1 = load i8, ptr %arrayidx7.us, align 1
  %add9.us = add i8 %1, %0
  store i8 %add9.us, ptr %arrayidx7.us, align 1
  %inner.iv.next = add nuw nsw i64 %inner.iv, 1
  %exitcond.not = icmp eq i64 %inner.iv.next, %n
  br i1 %exitcond.not, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
  %exitcond26.not = icmp eq i64 %outer.iv.next, 64
  br i1 %exitcond26.not, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}


define void @outer_pgo_3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_3'
; CHECK:      Calculating cost of runtime checks:
; CHECK:      We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
; CHECK:      Total cost of runtime checks: 2
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
  %mul.us = mul nsw i64 %outer.iv, %n
  br label %inner.loop

inner.loop:
  %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
  %add.us = add nuw nsw i64 %inner.iv, %mul.us
  %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
  %0 = load i8, ptr %arrayidx.us, align 1
  %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
  %1 = load i8, ptr %arrayidx7.us, align 1
  %add9.us = add i8 %1, %0
  store i8 %add9.us, ptr %arrayidx7.us, align 1
  %inner.iv.next = add nuw nsw i64 %inner.iv, 1
  %exitcond.not = icmp eq i64 %inner.iv.next, %n
  br i1 %exitcond.not, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
  %exitcond26.not = icmp eq i64 %outer.iv.next, %m
  br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !0

outer.exit:
  ret void
}


define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_minus1'
; CHECK:      Calculating cost of runtime checks:
; CHECK-NOT:  We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced
; CHECK:      Total cost of runtime checks: 6
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
  %mul.us = mul nsw i64 %outer.iv, %n
  br label %inner.loop

inner.loop:
  %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
  %add.us = add nuw nsw i64 %inner.iv, %mul.us
  %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
  %0 = load i8, ptr %arrayidx.us, align 1
  %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
  %1 = load i8, ptr %arrayidx7.us, align 1
  %add9.us = add i8 %1, %0
  store i8 %add9.us, ptr %arrayidx7.us, align 1
  %inner.iv.next = add nuw nsw i64 %inner.iv, 1
  %exitcond.not = icmp eq i64 %inner.iv.next, %n
  br i1 %exitcond.not, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
  %exitcond26.not = icmp eq i64 %outer.iv.next, %m
  br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !1

outer.exit:
  ret void
}


define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
; CHECK:      Calculating cost of runtime checks:
; CHECK:      We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
; CHECK:      Total cost of runtime checks: 2
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:4
entry:
  br label %outer.loop

outer.loop:
  %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
  %0 = mul nsw i64 %outer.iv, %n
  br label %inner.loop

inner.loop:
  %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
  %1 = add nuw nsw i64 %iv.inner, %0
  %arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %1
  %2 = load i32, ptr %arrayidx.us, align 4
  %arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %1
  %3 = load i32, ptr %arrayidx8.us, align 4
  %add9.us = add nsw i32 %3, %2
  store i32 %add9.us, ptr %arrayidx8.us, align 4
  %iv.inner.next = add nuw nsw i64 %iv.inner, 1
  %inner.exit.cond = icmp eq i64 %iv.inner.next, %n
  br i1 %inner.exit.cond, label %inner.exit, label %inner.loop

inner.exit:
  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
  %outer.exit.cond = icmp eq i64 %outer.iv.next, 3
  br i1 %outer.exit.cond, label %outer.exit, label %outer.loop

outer.exit:
  ret void
}


!0 = !{!"branch_weights", i32 10, i32 20}
!1 = !{!"branch_weights", i32 1, i32 -1}