llvm/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='loop(indvars),instcombine' -replexitval=always -S < %s | FileCheck %s

;; Test that loop's exit value is rewritten to its initial
;; value from loop preheader
define i32 @test1(ptr %var) {
; CHECK-LABEL: @test1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[VAR:%.*]], null
; CHECK-NEXT:    br label [[HEADER:%.*]]
; CHECK:       header:
; CHECK-NEXT:    br i1 [[COND]], label [[LOOP:%.*]], label [[EXIT:%.*]]
; CHECK:       loop:
; CHECK-NEXT:    br label [[HEADER]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  %cond = icmp eq ptr %var, null
  br label %header

header:
  %phi_indvar = phi i32 [0, %entry], [%indvar, %loop]
  br i1 %cond, label %loop, label %exit

loop:
  %indvar = add i32 %phi_indvar, 1
  br label %header

exit:
  ret i32 %phi_indvar
}

;; Test that we can not rewrite loop exit value if it's not
;; a phi node (%indvar is an add instruction in this test).
define i32 @test2(ptr %var) {
; CHECK-LABEL: @test2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[VAR:%.*]], null
; CHECK-NEXT:    br label [[HEADER:%.*]]
; CHECK:       header:
; CHECK-NEXT:    [[PHI_INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR:%.*]], [[HEADER]] ]
; CHECK-NEXT:    [[INDVAR]] = add i32 [[PHI_INDVAR]], 1
; CHECK-NEXT:    br i1 [[COND]], label [[HEADER]], label [[EXIT:%.*]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 [[INDVAR]]
;
entry:
  %cond = icmp eq ptr %var, null
  br label %header

header:
  %phi_indvar = phi i32 [0, %entry], [%indvar, %header]
  %indvar = add i32 %phi_indvar, 1
  br i1 %cond, label %header, label %exit

exit:
  ret i32 %indvar
}

;; Test that we can not rewrite loop exit value if the condition
;; is not in loop header.
define i32 @test3(ptr %var) {
; CHECK-LABEL: @test3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[COND1:%.*]] = icmp eq ptr [[VAR:%.*]], null
; CHECK-NEXT:    br label [[HEADER:%.*]]
; CHECK:       header:
; CHECK-NEXT:    [[PHI_INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR:%.*]], [[HEADER_BACKEDGE:%.*]] ]
; CHECK-NEXT:    [[INDVAR]] = add i32 [[PHI_INDVAR]], 1
; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 [[INDVAR]], 10
; CHECK-NEXT:    br i1 [[COND2]], label [[HEADER_BACKEDGE]], label [[BODY:%.*]]
; CHECK:       header.backedge:
; CHECK-NEXT:    br label [[HEADER]]
; CHECK:       body:
; CHECK-NEXT:    br i1 [[COND1]], label [[HEADER_BACKEDGE]], label [[EXIT:%.*]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 [[PHI_INDVAR]]
;
entry:
  %cond1 = icmp eq ptr %var, null
  br label %header

header:
  %phi_indvar = phi i32 [0, %entry], [%indvar, %header], [%indvar, %body]
  %indvar = add i32 %phi_indvar, 1
  %cond2 = icmp eq i32 %indvar, 10
  br i1 %cond2, label %header, label %body

body:
  br i1 %cond1, label %header, label %exit

exit:
  ret i32 %phi_indvar
}


; Multiple exits dominating latch
define i32 @test4(i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test4(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[HEADER:%.*]]
; CHECK:       header:
; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
; CHECK:       loop:
; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[HEADER]], label [[EXIT]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 0
;
entry:
  br label %header

header:
  %phi_indvar = phi i32 [0, %entry], [%indvar, %loop]
  br i1 %cond1, label %loop, label %exit

loop:
  %indvar = add i32 %phi_indvar, 1
  br i1 %cond2, label %header, label %exit

exit:
  ret i32 %phi_indvar
}

; A conditionally executed exit.
define i32 @test5(ptr %addr, i1 %cond2) {
; CHECK-LABEL: @test5(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[HEADER:%.*]]
; CHECK:       header:
; CHECK-NEXT:    [[PHI_INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR:%.*]], [[LOOP:%.*]] ]
; CHECK-NEXT:    [[COND1:%.*]] = load volatile i1, ptr [[ADDR:%.*]], align 1
; CHECK-NEXT:    br i1 [[COND1]], label [[LOOP]], label [[MAYBE:%.*]]
; CHECK:       maybe:
; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK:       loop:
; CHECK-NEXT:    [[INDVAR]] = add i32 [[PHI_INDVAR]], 1
; CHECK-NEXT:    br label [[HEADER]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 [[PHI_INDVAR]]
;
entry:
  br label %header

header:
  %phi_indvar = phi i32 [0, %entry], [%indvar, %loop]
  %cond1 = load volatile i1, ptr %addr
  br i1 %cond1, label %loop, label %maybe

maybe:
  br i1 %cond2, label %loop, label %exit

loop:
  %indvar = add i32 %phi_indvar, 1
  br label %header

exit:
  ret i32 %phi_indvar
}

define i16 @pr57336(i16 %end, i16 %m) mustprogress {
; CHECK-LABEL: @pr57336(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[INC8:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[INC8]], 1
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i16 [[M:%.*]], [[INC8]]
; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp slt i16 [[END:%.*]], [[MUL]]
; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[CRIT_EDGE:%.*]], label [[FOR_BODY]]
; CHECK:       crit_edge:
; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[END]], 1
; CHECK-NEXT:    [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP0]], i16 0)
; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i16 [[END]], 32767
; CHECK-NEXT:    [[UMIN:%.*]] = zext i1 [[TMP1]] to i16
; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i16 [[SMAX]], [[UMIN]]
; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[M]], i16 1)
; CHECK-NEXT:    [[TMP3:%.*]] = udiv i16 [[TMP2]], [[UMAX]]
; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[TMP3]], [[UMIN]]
; CHECK-NEXT:    ret i16 [[TMP4]]
;
entry:
  br label %for.body

for.body:
  %inc8 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
  %inc137 = phi i32 [ %inc1, %for.body ], [ 0, %entry ]
  %inc1 = add nsw i32 %inc137, 1
  %inc = add nsw i16 %inc8, 1
  %mul = mul nsw i16 %m, %inc8
  %cmp.not = icmp slt i16 %end, %mul
  br i1 %cmp.not, label %crit_edge, label %for.body

crit_edge:
  %inc137.lcssa = phi i32 [ %inc137, %for.body ]
  %conv = trunc i32 %inc137.lcssa to i16
  ret i16 %conv
}

define i32 @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: @vscale_slt_with_vp_umin(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT:    [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2
; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       early.exit:
; CHECK-NEXT:    ret i32 0
; CHECK:       for.body:
; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[LEFT:%.*]] = sub nsw i32 [[N]], [[I_05]]
; CHECK-NEXT:    [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]])
; CHECK-NEXT:    store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]]
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]]
; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK:       for.end:
; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[N]], -1
; CHECK-NEXT:    [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]]
; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]]
; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP2]], 2
; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]]
; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]])
; CHECK-NEXT:    ret i32 [[UMIN]]
;
entry:
  %vscale = call i32 @llvm.vscale.i32()
  %VF = shl nuw nsw i32 %vscale, 2
  %cmp4 = icmp sgt i32 %n, 0
  br i1 %cmp4, label %for.body, label %early.exit

early.exit:
  ret i32 0

for.body:
  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
  %left = sub i32 %n, %i.05
  %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
  store i32 %VF.capped, ptr %A

  %add = add nsw i32 %i.05, %VF
  %cmp = icmp slt i32 %add, %n
  br i1 %cmp, label %for.body, label %for.end

for.end:
  ret i32 %VF.capped
}

define i32 @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: @vscale_slt_with_vp_umin2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT:    [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2
; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], [[VF]]
; CHECK-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       early.exit:
; CHECK-NEXT:    ret i32 0
; CHECK:       for.body:
; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[LEFT:%.*]] = sub i32 [[N]], [[I_05]]
; CHECK-NEXT:    [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]])
; CHECK-NEXT:    store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]]
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]]
; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK:       for.end:
; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT:    [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]]
; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]]
; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP2]], 2
; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]]
; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]])
; CHECK-NEXT:    ret i32 [[UMIN]]
;
entry:
  %vscale = call i32 @llvm.vscale.i32()
  %VF = shl nuw nsw i32 %vscale, 2
  %cmp4 = icmp sgt i32 %n, %VF
  br i1 %cmp4, label %for.body, label %early.exit

early.exit:
  ret i32 0

for.body:
  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
  %left = sub i32 %n, %i.05
  %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
  store i32 %VF.capped, ptr %A

  %add = add nsw i32 %i.05, %VF
  %cmp = icmp slt i32 %add, %n
  br i1 %cmp, label %for.body, label %for.end

for.end:
  ret i32 %VF.capped
}