llvm/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s

declare void @f() convergent
declare void @g()

; Although this loop contains a convergent instruction, it should be
; fully unrolled.
define i32 @full_unroll() {
; CHECK-LABEL: @full_unroll(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    br label [[L3:%.*]]
; CHECK:       l3:
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    br label [[A:%.*]]
; CHECK:       a:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_1:%.*]]
; CHECK:       a.1:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_2:%.*]]
; CHECK:       a.2:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    ret i32 0
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  br label %l3

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, 3
  br label %a

a:
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  br i1 %exitcond, label %exit, label %l3

exit:
  ret i32 0
}

; This loop contains a convergent instruction, but it should be partially
; unrolled.  The unroll count is the largest power of 2 that divides the
; multiple -- 4, in this case.
define i32 @runtime_unroll(i32 %n) {
; CHECK-LABEL: @runtime_unroll(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 12
; CHECK-NEXT:    br label [[L3:%.*]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[A_3:%.*]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    br label [[A:%.*]]
; CHECK:       a:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_1:%.*]]
; CHECK:       a.1:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_2:%.*]]
; CHECK:       a.2:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_3]]
; CHECK:       a.3:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    [[INC_3]] = add nsw i32 [[X_0]], 4
; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[LOOP_CTL]]
; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[EXIT:%.*]], label [[L3]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  %loop_ctl = mul nsw i32 %n, 12
  br label %l3

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  br label %a

a:
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, %loop_ctl
  br i1 %exitcond, label %exit, label %l3

exit:
  ret i32 0
}

; This loop contains a convergent instruction, so its partial unroll
; count must divide its trip multiple.  This overrides its unroll
; pragma -- we unroll exactly 8 times, even though 16 is requested.
define i32 @pragma_unroll(i32 %n) {
; CHECK-LABEL: @pragma_unroll(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 24
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_7:%.*]], [[A_7:%.*]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    br label [[A:%.*]]
; CHECK:       a:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_1:%.*]]
; CHECK:       a.1:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_2:%.*]]
; CHECK:       a.2:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_3:%.*]]
; CHECK:       a.3:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_4:%.*]]
; CHECK:       a.4:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_5:%.*]]
; CHECK:       a.5:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_6:%.*]]
; CHECK:       a.6:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    br label [[A_7]]
; CHECK:       a.7:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    [[INC_7]] = add nsw i32 [[X_0]], 8
; CHECK-NEXT:    [[EXITCOND_7:%.*]] = icmp eq i32 [[INC_7]], [[LOOP_CTL]]
; CHECK-NEXT:    br i1 [[EXITCOND_7]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  %loop_ctl = mul nsw i32 %n, 24
  br label %l3, !llvm.loop !0

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  br label %a

a:
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, %loop_ctl
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !0

exit:
  ret i32 0
}

; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 divides trip count 4. The loop unroll should respect the pragma.
define void @pragma_unroll_divisible_trip_count() {
; CHECK-LABEL: @pragma_unroll_divisible_trip_count(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[X_0]], 2
; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 4
; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK:       exit:
; CHECK-NEXT:    ret void
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  br label %l3, !llvm.loop !1

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, 4
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1

exit:
  ret void
}

; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 2
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[X_0]], 2
; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], [[LOOP_CTL]]
; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  %loop_ctl = mul nsw i32 %n, 2
  br label %l3, !llvm.loop !1

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, %loop_ctl
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1

exit:
  ret i32 0
}

; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 is unknown to divide runtime trip count, the loop is not unrolled
; since remainder is forbidden for unrolling convergent loop.
define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    [[INC]] = add nsw i32 [[X_0]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  br label %l3, !llvm.loop !1

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, %n
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1

exit:
  ret i32 0
}

; This loop contains a convergent instruction. Since the pragma loop unroll
; count 2 does not divide trip count 5, the loop is not unrolled by 2
; since remainder is forbidden for unrolling convergent loop. Instead, the
; loop gets fully unrolled.
define i32 @pragma_unroll_indivisible_trip_count() {
; CHECK-LABEL: @pragma_unroll_indivisible_trip_count(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l3:
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    ret i32 0
;
entry:
  %anchor = call token @llvm.experimental.convergence.anchor()
  br label %l3, !llvm.loop !1

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
  %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, 5
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1

exit:
  ret i32 0
}

; This loop contains a convergent instruction that is anchored inside the loop
; itself. It is unrolled by 2 with remainder, as requested by the loop metadata.
define i32 @pragma_unroll_with_remainder(i32 %n) {
; CHECK-LABEL: @pragma_unroll_with_remainder(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[N:%.*]]
; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], -1
; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 1
; CHECK-NEXT:    br i1 [[TMP2]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK:       entry.new:
; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[INC_1:%.*]], [[L3]] ]
; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[L3]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    [[TOK_LOOP_1:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP_1]]) ]
; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[X_0]], 2
; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[L3]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK:       exit.unr-lcssa.loopexit:
; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
; CHECK:       exit.unr-lcssa:
; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK:       l3.epil.preheader:
; CHECK-NEXT:    br label [[L3_EPIL:%.*]]
; CHECK:       l3.epil:
; CHECK-NEXT:    [[TOK_LOOP_EPIL:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP_EPIL]]) ]
; CHECK-NEXT:    br label [[EXIT]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  br label %l3, !llvm.loop !1

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
  %tok.loop = call token @llvm.experimental.convergence.anchor()
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, %n
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1

exit:
  ret i32 0
}

; Don't unroll a loop that is extended by convergence controls.
;
; We could theoretically duplicate the extension part, but this is not
; implemented.
define i32 @extended_loop(i32 %n) {
; CHECK-LABEL: @extended_loop(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    [[INC]] = add nsw i32 [[X_0]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
; CHECK:       exit:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
; CHECK-NEXT:    ret i32 0
;
entry:
  br label %l3, !llvm.loop !1

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
  %tok.loop = call token @llvm.experimental.convergence.anchor()
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, %n
  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1

exit:
  call void @f() [ "convergencectrl"(token %tok.loop) ]
  ret i32 0
}

; Inner loop is extended beyond the outer loop. No unrolling possible.

define i32 @extended_inner_loop_1(i32 %n, i1 %cond) {
; CHECK-LABEL: @extended_inner_loop_1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[L3:%.*]]
; CHECK:       l3:
; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT:    [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    [[INC]] = add nsw i32 [[X_0]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 4
; CHECK-NEXT:    br label [[L2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2:
; CHECK-NEXT:    [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
; CHECK-NEXT:    br i1 [[COND:%.*]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP4]]
; CHECK:       latch:
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]]
; CHECK:       exit:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
; CHECK-NEXT:    ret i32 0
;
entry:
  br label %l3

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
  %tok.loop = call token @llvm.experimental.convergence.anchor()
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, 4
  br label %l2, !llvm.loop !1

l2:
  %tok.l2 = call token @llvm.experimental.convergence.anchor()
  call void @f() [ "convergencectrl"(token %tok.l2) ]
  br i1 %cond, label %l2, label %latch, !llvm.loop !1

latch:
  br i1 %exitcond, label %exit, label %l3

exit:
  call void @f() [ "convergencectrl"(token %tok.l2) ]
  ret i32 0
}

; Inner loop is extended inside the outer loop. Outer loop is unrolled.

define i32 @extended_inner_loop_2(i32 %n, i1 %cond) {
; CHECK-LABEL: @extended_inner_loop_2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[L3:%.*]]
; CHECK:       l3:
; CHECK-NEXT:    br label [[L2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2:
; CHECK-NEXT:    [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
; CHECK-NEXT:    br i1 [[COND:%.*]], label [[L2]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       latch:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
; CHECK-NEXT:    br label [[L2_1:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.1:
; CHECK-NEXT:    [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_1]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       latch.1:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
; CHECK-NEXT:    br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.2:
; CHECK-NEXT:    [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_2]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       latch.2:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
; CHECK-NEXT:    br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.3:
; CHECK-NEXT:    [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_3]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       latch.3:
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
; CHECK-NEXT:    ret i32 0
;
entry:
  br label %l3

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
  %tok.loop = call token @llvm.experimental.convergence.anchor()
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, 4
  br label %l2, !llvm.loop !1

l2:
  %tok.l2 = call token @llvm.experimental.convergence.anchor()
  call void @f() [ "convergencectrl"(token %tok.l2) ]
  br i1 %cond, label %l2, label %latch, !llvm.loop !1

latch:
  call void @f() [ "convergencectrl"(token %tok.l2) ]
  br i1 %exitcond, label %exit, label %l3

exit:
  ret i32 0
}

; No extension. Both loops unrolled.

define i32 @unroll_nest(i32 %n, i1 %cond) {
; CHECK-LABEL: @unroll_nest(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[L3:%.*]]
; CHECK:       l3:
; CHECK-NEXT:    br label [[L2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2:
; CHECK-NEXT:    [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
; CHECK-NEXT:    br i1 [[COND:%.*]], label [[L2_1:%.*]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.1:
; CHECK-NEXT:    [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK:       latch:
; CHECK-NEXT:    br label [[L2_12:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.12:
; CHECK-NEXT:    [[TOK_L2_11:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_11]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_1_1:%.*]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.1.1:
; CHECK-NEXT:    [[TOK_L2_1_1:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_1_1]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_12]], label [[LATCH_1]], !llvm.loop [[LOOP9]]
; CHECK:       latch.1:
; CHECK-NEXT:    br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.2:
; CHECK-NEXT:    [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_1_2:%.*]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.1.2:
; CHECK-NEXT:    [[TOK_L2_1_2:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_1_2]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_2]], label [[LATCH_2]], !llvm.loop [[LOOP9]]
; CHECK:       latch.2:
; CHECK-NEXT:    br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.3:
; CHECK-NEXT:    [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_1_3:%.*]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
; CHECK:       l2.1.3:
; CHECK-NEXT:    [[TOK_L2_1_3:%.*]] = call token @llvm.experimental.convergence.anchor()
; CHECK-NEXT:    call void @f() [ "convergencectrl"(token [[TOK_L2_1_3]]) ]
; CHECK-NEXT:    br i1 [[COND]], label [[L2_3]], label [[LATCH_3]], !llvm.loop [[LOOP9]]
; CHECK:       latch.3:
; CHECK-NEXT:    ret i32 0
;
entry:
  br label %l3

l3:
  %x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
  %tok.loop = call token @llvm.experimental.convergence.anchor()
  %inc = add nsw i32 %x.0, 1
  %exitcond = icmp eq i32 %inc, 4
  br label %l2, !llvm.loop !1

l2:
  %tok.l2 = call token @llvm.experimental.convergence.anchor()
  call void @f() [ "convergencectrl"(token %tok.l2) ]
  br i1 %cond, label %l2, label %latch, !llvm.loop !1

latch:
  br i1 %exitcond, label %exit, label %l3

exit:
  ret i32 0
}

declare token @llvm.experimental.convergence.anchor()
declare token @llvm.experimental.convergence.loop()

!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}