; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-count=2 | FileCheck %s
; Make sure the loop is unrolled without a remainder loop based on an assumption
; that the least significant bit is known to be zero.
define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %p, i32 %q) local_unnamed_addr {
; CHECK-LABEL: @assumeDivisibleTC(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = and i32 [[P:%.*]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[GUARDED:%.*]], label [[EXIT:%.*]]
; CHECK: guarded:
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[Q:%.*]], 2
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]])
; CHECK-NEXT: [[GT:%.*]] = icmp sgt i32 [[P]], [[Q]]
; CHECK-NEXT: [[N:%.*]] = select i1 [[GT]], i32 [[P]], i32 [[Q]]
; CHECK-NEXT: [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP0]], 3
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_011]]
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX4]], align 1
; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INC]]
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP1]], 3
; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[I_011]], 2
; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]]
; CHECK-NEXT: br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%and = and i32 %p, 1
%cmp = icmp eq i32 %and, 0
br i1 %cmp, label %guarded, label %exit
guarded:
%rem = urem i32 %q, 2
%cmp2 = icmp eq i32 %rem, 0
tail call void @llvm.assume(i1 %cmp2)
%gt = icmp sgt i32 %p, %q
%n = select i1 %gt, i32 %p, i32 %q
%cmp110 = icmp sgt i32 %n, 0
br i1 %cmp110, label %for.body, label %exit
for.body:
%i.011 = phi i32 [ %inc, %for.body ], [ 0, %guarded ]
%arrayidx = getelementptr inbounds i8, ptr %b, i32 %i.011
%0 = load i8, ptr %arrayidx, align 1
%add = add i8 %0, 3
%arrayidx4 = getelementptr inbounds i8, ptr %a, i32 %i.011
store i8 %add, ptr %arrayidx4, align 1
%inc = add nuw nsw i32 %i.011, 1
%cmp1 = icmp slt i32 %inc, %n
br i1 %cmp1, label %for.body, label %exit
exit:
ret void
}
; Make sure the loop is unrolled with a remainder loop when the trip-count
; is not provably divisible by the unroll factor.
define dso_local void @cannotProveDivisibleTC(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %p, i32 %q) local_unnamed_addr {
; CHECK-LABEL: @cannotProveDivisibleTC(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = and i32 [[P:%.*]], 6
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[GUARDED:%.*]], label [[EXIT:%.*]]
; CHECK: guarded:
; CHECK-NEXT: [[REM:%.*]] = urem i32 [[Q:%.*]], 2
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]])
; CHECK-NEXT: [[GT:%.*]] = icmp sgt i32 [[P]], [[Q]]
; CHECK-NEXT: [[N:%.*]] = select i1 [[GT]], i32 [[P]], i32 [[Q]]
; CHECK-NEXT: [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 1
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; CHECK: for.body.preheader.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP2]], 3
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_011]]
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX4]], align 1
; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INC]]
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP3]], 3
; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[I_011]], 2
; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: exit.loopexit.unr-lcssa.loopexit:
; CHECK-NEXT: [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[EXIT_LOOPEXIT_UNR_LCSSA]]
; CHECK: exit.loopexit.unr-lcssa:
; CHECK-NEXT: [[I_011_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR_PH]], [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: for.body.epil.preheader:
; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]]
; CHECK: for.body.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_UNR]]
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_EPIL]], align 1
; CHECK-NEXT: [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3
; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011_UNR]]
; CHECK-NEXT: store i8 [[ADD_EPIL]], ptr [[ARRAYIDX4_EPIL]], align 1
; CHECK-NEXT: br label [[EXIT_LOOPEXIT]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%and = and i32 %p, 6
%cmp = icmp eq i32 %and, 0
br i1 %cmp, label %guarded, label %exit
guarded:
%rem = urem i32 %q, 2
%cmp2 = icmp eq i32 %rem, 0
tail call void @llvm.assume(i1 %cmp2)
%gt = icmp sgt i32 %p, %q
%n = select i1 %gt, i32 %p, i32 %q
%cmp110 = icmp sgt i32 %n, 0
br i1 %cmp110, label %for.body, label %exit
for.body:
%i.011 = phi i32 [ %inc, %for.body ], [ 0, %guarded ]
%arrayidx = getelementptr inbounds i8, ptr %b, i32 %i.011
%0 = load i8, ptr %arrayidx, align 1
%add = add i8 %0, 3
%arrayidx4 = getelementptr inbounds i8, ptr %a, i32 %i.011
store i8 %add, ptr %arrayidx4, align 1
%inc = add nuw nsw i32 %i.011, 1
%cmp1 = icmp slt i32 %inc, %n
br i1 %cmp1, label %for.body, label %exit
exit:
ret void
}
declare void @llvm.assume(i1 noundef) nofree nosync nounwind willreturn