; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
; One dimensional loop with load that can be hoisted outside of loop
; for (int i = 0; i < N; ++i)
; if (!memcmp(a[i], b, 4))
; sum += 1;
;
define i64 @one_dimensional(ptr %a, ptr %b, i64 %N) {
; CHECK-LABEL: one_dimensional:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB0_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x10, [x0], #8
; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: cmp w10, w9
; CHECK-NEXT: cinc x8, x8, eq
; CHECK-NEXT: subs x2, x2, #1
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %for.exit
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
%0 = load ptr, ptr %arrayidx, align 8
%bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4)
%tobool = icmp eq i32 %bcmp, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.05, %add
%inc = add nuw i64 %i.06, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.exit, label %for.body
for.exit: ; preds = %for.body
ret i64 %spec.select
}
; Same but loop is two dimensional. Load is hosted outside of both loops
; for (int i = 0; i < N; ++i)
; for (int j = 0; j < M; ++j)
; if (!memcmp(a[i][j], b, 4))
; sum += 1;
;
define i64 @two_dimensional(ptr %a, ptr %b, i64 %N, i64 %M) {
; CHECK-LABEL: two_dimensional:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB1_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Loop Header: Depth=1
; CHECK-NEXT: // Child Loop BB1_2 Depth 2
; CHECK-NEXT: ldr x11, [x0, x9, lsl #3]
; CHECK-NEXT: mov x12, x3
; CHECK-NEXT: .LBB1_2: // %for.body4
; CHECK-NEXT: // Parent Loop BB1_1 Depth=1
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldr x13, [x11], #8
; CHECK-NEXT: ldr w13, [x13]
; CHECK-NEXT: cmp w13, w10
; CHECK-NEXT: cinc x8, x8, eq
; CHECK-NEXT: subs x12, x12, #1
; CHECK-NEXT: b.ne .LBB1_2
; CHECK-NEXT: // %bb.3: // %for.cond1.for.exit3_crit_edge
; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: add x9, x9, #1
; CHECK-NEXT: cmp x9, x2
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.4: // %for.exit
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.cond1.for.exit3_crit_edge
%i.019 = phi i64 [ %inc7, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ]
%sum.018 = phi i64 [ %spec.select, %for.cond1.for.exit3_crit_edge ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.019
%0 = load ptr, ptr %arrayidx, align 8
br label %for.body4
for.body4: ; preds = %for.cond1.preheader, %for.body4
%j.016 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
%sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ]
%arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016
%1 = load ptr, ptr %arrayidx5, align 8
%bcmp = tail call i32 @bcmp(ptr %1, ptr %b, i64 4)
%tobool = icmp eq i32 %bcmp, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.115, %add
%inc = add nuw i64 %j.016, 1
%exitcond = icmp eq i64 %inc, %M
br i1 %exitcond, label %for.cond1.for.exit3_crit_edge, label %for.body4
for.cond1.for.exit3_crit_edge: ; preds = %for.body4
%inc7 = add nuw i64 %i.019, 1
%exitcond22 = icmp eq i64 %inc7, %N
br i1 %exitcond22, label %for.exit, label %for.cond1.preheader
for.exit: ; preds = %for.cond1.for.exit3_crit_edge
ret i64 %spec.select
}
; Same but loop is three dimensional. Load is hosted outside of all three loops
; for (int i = 0; i < N; ++i)
; for (int j = 0; j < M; ++j)
; for (int k = 0; k < K; ++k)
; if (!memcmp(a[i][j][k], b, 4))
; sum += 1;
;
define i64 @three_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
; CHECK-LABEL: three_dimensional:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB2_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Loop Header: Depth=1
; CHECK-NEXT: // Child Loop BB2_2 Depth 2
; CHECK-NEXT: // Child Loop BB2_3 Depth 3
; CHECK-NEXT: ldr x11, [x0, x9, lsl #3]
; CHECK-NEXT: mov x12, xzr
; CHECK-NEXT: .LBB2_2: // %for.cond5.preheader
; CHECK-NEXT: // Parent Loop BB2_1 Depth=1
; CHECK-NEXT: // => This Loop Header: Depth=2
; CHECK-NEXT: // Child Loop BB2_3 Depth 3
; CHECK-NEXT: ldr x13, [x11, x12, lsl #3]
; CHECK-NEXT: mov x14, x4
; CHECK-NEXT: .LBB2_3: // %for.body8
; CHECK-NEXT: // Parent Loop BB2_1 Depth=1
; CHECK-NEXT: // Parent Loop BB2_2 Depth=2
; CHECK-NEXT: // => This Inner Loop Header: Depth=3
; CHECK-NEXT: ldr x15, [x13], #8
; CHECK-NEXT: ldr w15, [x15]
; CHECK-NEXT: cmp w15, w10
; CHECK-NEXT: cinc x8, x8, eq
; CHECK-NEXT: subs x14, x14, #1
; CHECK-NEXT: b.ne .LBB2_3
; CHECK-NEXT: // %bb.4: // %for.cond5.for.cond
; CHECK-NEXT: // in Loop: Header=BB2_2 Depth=2
; CHECK-NEXT: add x12, x12, #1
; CHECK-NEXT: cmp x12, x3
; CHECK-NEXT: b.ne .LBB2_2
; CHECK-NEXT: // %bb.5: // %for.cond1.for.cond
; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1
; CHECK-NEXT: add x9, x9, #1
; CHECK-NEXT: cmp x9, x2
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.6: // %for.exit
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond
%i.033 = phi i64 [ %inc15, %for.cond1.for.cond ], [ 0, %entry ]
%sum.032 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.033
%0 = load ptr, ptr %arrayidx, align 8
br label %for.cond5.preheader
for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
%j.029 = phi i64 [ 0, %for.cond1.preheader ], [ %inc12, %for.cond5.for.cond ]
%sum.128 = phi i64 [ %sum.032, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ]
%arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.029
%1 = load ptr, ptr %arrayidx9, align 8
br label %for.body8
for.body8: ; preds = %for.body8, %for.cond5.preheader
%k.026 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ]
%sum.225 = phi i64 [ %sum.128, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
%arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.026
%2 = load ptr, ptr %arrayidx10, align 8
%bcmp = tail call i32 @bcmp(ptr %2, ptr %b, i64 4)
%tobool = icmp eq i32 %bcmp, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.225, %add
%inc = add nuw i64 %k.026, 1
%exitcond = icmp eq i64 %inc, %K
br i1 %exitcond, label %for.cond5.for.cond, label %for.body8
for.cond5.for.cond: ; preds = %for.body8
%inc12 = add nuw i64 %j.029, 1
%exitcond44 = icmp eq i64 %inc12, %M
br i1 %exitcond44, label %for.cond1.for.cond, label %for.cond5.preheader
for.cond1.for.cond: ; preds = %for.cond5.for.cond
%inc15 = add nuw i64 %i.033, 1
%exitcond45 = icmp eq i64 %inc15, %N
br i1 %exitcond45, label %for.exit, label %for.cond1.preheader
for.exit: ; preds = %for.cond1.for.cond
ret i64 %spec.select
}
; Three dimensional loop but `b` is invariant only relatively to the inner loop.
; Make sure that load is hoisted only outside of first loop
; for (int i = 0; i < N; ++i)
; for (int j = 0; j < M; ++j)
; for (int k = 0; k < K; ++k)
; if (!memcmp(a[i][j][k], b[j], 4))
; sum += 1;
;
define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
; CHECK-LABEL: three_dimensional_middle:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB3_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Loop Header: Depth=1
; CHECK-NEXT: // Child Loop BB3_2 Depth 2
; CHECK-NEXT: // Child Loop BB3_3 Depth 3
; CHECK-NEXT: ldr x10, [x0, x9, lsl #3]
; CHECK-NEXT: mov x11, xzr
; CHECK-NEXT: .LBB3_2: // %for.cond5.preheader
; CHECK-NEXT: // Parent Loop BB3_1 Depth=1
; CHECK-NEXT: // => This Loop Header: Depth=2
; CHECK-NEXT: // Child Loop BB3_3 Depth 3
; CHECK-NEXT: ldr x13, [x1, x11, lsl #3]
; CHECK-NEXT: ldr x12, [x10, x11, lsl #3]
; CHECK-NEXT: mov x14, x4
; CHECK-NEXT: ldr w13, [x13]
; CHECK-NEXT: .LBB3_3: // %for.body8
; CHECK-NEXT: // Parent Loop BB3_1 Depth=1
; CHECK-NEXT: // Parent Loop BB3_2 Depth=2
; CHECK-NEXT: // => This Inner Loop Header: Depth=3
; CHECK-NEXT: ldr x15, [x12], #8
; CHECK-NEXT: ldr w15, [x15]
; CHECK-NEXT: cmp w15, w13
; CHECK-NEXT: cinc x8, x8, eq
; CHECK-NEXT: subs x14, x14, #1
; CHECK-NEXT: b.ne .LBB3_3
; CHECK-NEXT: // %bb.4: // %for.cond5.for.cond
; CHECK-NEXT: // in Loop: Header=BB3_2 Depth=2
; CHECK-NEXT: add x11, x11, #1
; CHECK-NEXT: cmp x11, x3
; CHECK-NEXT: b.ne .LBB3_2
; CHECK-NEXT: // %bb.5: // %for.cond1.for.cond
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: add x9, x9, #1
; CHECK-NEXT: cmp x9, x2
; CHECK-NEXT: b.ne .LBB3_1
; CHECK-NEXT: // %bb.6: // %for.exit
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond
%i.035 = phi i64 [ %inc16, %for.cond1.for.cond ], [ 0, %entry ]
%sum.034 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.035
%0 = load ptr, ptr %arrayidx, align 8
br label %for.cond5.preheader
for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
%j.031 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.cond5.for.cond ]
%sum.130 = phi i64 [ %sum.034, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ]
%arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.031
%1 = load ptr, ptr %arrayidx9, align 8
%arrayidx11 = getelementptr inbounds ptr, ptr %b, i64 %j.031
%2 = load ptr, ptr %arrayidx11, align 8
br label %for.body8
for.body8: ; preds = %for.body8, %for.cond5.preheader
%k.028 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ]
%sum.227 = phi i64 [ %sum.130, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
%arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.028
%3 = load ptr, ptr %arrayidx10, align 8
%bcmp = tail call i32 @bcmp(ptr %3, ptr %2, i64 4)
%tobool = icmp eq i32 %bcmp, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.227, %add
%inc = add nuw i64 %k.028, 1
%exitcond = icmp eq i64 %inc, %K
br i1 %exitcond, label %for.cond5.for.cond, label %for.body8
for.cond5.for.cond: ; preds = %for.body8
%inc13 = add nuw i64 %j.031, 1
%exitcond46 = icmp eq i64 %inc13, %M
br i1 %exitcond46, label %for.cond1.for.cond, label %for.cond5.preheader
for.cond1.for.cond: ; preds = %for.cond5.for.cond
%inc16 = add nuw i64 %i.035, 1
%exitcond47 = icmp eq i64 %inc16, %N
br i1 %exitcond47, label %for.exit, label %for.cond1.preheader
for.exit: ; preds = %for.cond1.for.cond
ret i64 %spec.select
}
; Make sure that store inside loop prevents hoisting invariant loads
; for (int i = 0; i < N; ++i)
; c[i] = memcmp(a[i], b, 4);
;
define void @one_dimensional_with_store(ptr %a, ptr %b, ptr %c, i32 %N) {
; CHECK-LABEL: one_dimensional_with_store:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, w3
; CHECK-NEXT: .LBB4_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x9, [x0], #8
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: rev w10, w10
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: cset w9, hi
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: sub w9, w9, w10
; CHECK-NEXT: strb w9, [x2], #1
; CHECK-NEXT: b.ne .LBB4_1
; CHECK-NEXT: // %bb.2: // %for.exit
; CHECK-NEXT: ret
entry:
br label %for.body.preheader
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %N to i64
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
%0 = load ptr, ptr %arrayidx, align 8
%call = tail call i32 @memcmp(ptr %0, ptr %b, i64 4)
%conv = trunc i32 %call to i8
%arrayidx2 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
store i8 %conv, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.exit, label %for.body
for.exit: ; preds = %for.body
ret void
}
; Make sure that call inside loop prevents hoisting invariant loads
;
define i32 @one_dimensional_with_call(ptr %a, ptr %b, i32 %N) {
; CHECK-LABEL: one_dimensional_with_call:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w30, -48
; CHECK-NEXT: mov x19, x1
; CHECK-NEXT: mov x21, x0
; CHECK-NEXT: mov w20, wzr
; CHECK-NEXT: mov w22, w2
; CHECK-NEXT: .LBB5_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x8, [x21], #8
; CHECK-NEXT: ldr w9, [x19]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cinc w20, w20, eq
; CHECK-NEXT: bl func
; CHECK-NEXT: subs x22, x22, #1
; CHECK-NEXT: b.ne .LBB5_1
; CHECK-NEXT: // %bb.2: // %for.exit
; CHECK-NEXT: mov w0, w20
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
br label %for.body.preheader
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %N to i64
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%sum.05 = phi i32 [ 0, %for.body.preheader ], [ %spec.select, %for.body ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
%0 = load ptr, ptr %arrayidx, align 8
%bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4)
%tobool.not = icmp eq i32 %bcmp, 0
%add = zext i1 %tobool.not to i32
%spec.select = add nuw nsw i32 %sum.05, %add
tail call void @func()
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.exit, label %for.body
for.exit: ; preds = %for.body
ret i32 %spec.select
}
; One dimensional loop with memcmp size equal six.
; The test shows that shows that several loads can be hoisted at the same time.
; for (int i = 0; i < N; ++i)
; if (!memcmp(a[i], b, 6))
; sum += 1;
;
define i64 @one_dimensional_two_loads(ptr %a, ptr %b, i64 %N) {
; CHECK-LABEL: one_dimensional_two_loads:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: ldrh w10, [x1, #4]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB6_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x11, [x0], #8
; CHECK-NEXT: ldr w12, [x11]
; CHECK-NEXT: ldrh w11, [x11, #4]
; CHECK-NEXT: cmp w12, w9
; CHECK-NEXT: ccmp w11, w10, #0, eq
; CHECK-NEXT: cinc x8, x8, eq
; CHECK-NEXT: subs x2, x2, #1
; CHECK-NEXT: b.ne .LBB6_1
; CHECK-NEXT: // %bb.2: // %for.exit
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
%0 = load ptr, ptr %arrayidx, align 8
%bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 6)
%tobool = icmp eq i32 %bcmp, 0
%add = zext i1 %tobool to i64
%spec.select = add i64 %sum.05, %add
%inc = add nuw i64 %i.06, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.exit, label %for.body
for.exit: ; preds = %for.body
ret i64 %spec.select
}
; See issue https://github.com/llvm/llvm-project/issues/72855
;
; When hoisting instruction out of the loop, ensure that loads are not common
; subexpressions eliminated. In this example pointer %c may alias pointer %b,
; so when hoisting `%y = load i64, ptr %b` instruction we can't replace it with
; `%b.val = load i64, ptr %b`
;
define i64 @hoisting_no_cse(ptr %a, ptr %b, ptr %c, i64 %N) {
; CHECK-LABEL: hoisting_no_cse:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr x8, [x1]
; CHECK-NEXT: add x8, x8, #1
; CHECK-NEXT: str x8, [x2]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: .LBB7_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x10, [x0], #8
; CHECK-NEXT: ldr x10, [x10]
; CHECK-NEXT: cmp x10, x9
; CHECK-NEXT: cinc x8, x8, eq
; CHECK-NEXT: subs x3, x3, #1
; CHECK-NEXT: b.ne .LBB7_1
; CHECK-NEXT: // %bb.2: // %for.exit
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
%b.val = load i64, ptr %b
%b.val.changed = add i64 %b.val, 1
store i64 %b.val.changed, ptr %c
br label %for.body
for.body: ; preds = %entry, %for.body
%idx = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%sum = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %idx
%0 = load ptr, ptr %arrayidx, align 8
%x = load i64, ptr %0
%y = load i64, ptr %b
%cmp = icmp eq i64 %x, %y
%add = zext i1 %cmp to i64
%spec.select = add i64 %sum, %add
%inc = add nuw i64 %idx, 1
%exitcond = icmp eq i64 %inc, %N
br i1 %exitcond, label %for.exit, label %for.body
for.exit: ; preds = %for.body
ret i64 %spec.select
}
declare i32 @bcmp(ptr, ptr, i64)
declare i32 @memcmp(ptr, ptr, i64)
declare void @func()