llvm/llvm/test/Transforms/LoopUnroll/X86/znver3.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=loop-unroll -unroll-allow-partial < %s | FileCheck %s

target triple = "x86_64-unknown-linux-gnu"

define i32 @test(ptr %ary) "target-cpu"="znver3" {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: ptr [[ARY:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_127:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_127:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[SUM_NEXT:%.*]] = add nsw i32 [[VAL]], [[SUM]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT:    [[VAL_1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT:    [[SUM_NEXT_1:%.*]] = add nsw i32 [[VAL_1]], [[SUM_NEXT]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT:    [[VAL_2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT:    [[SUM_NEXT_2:%.*]] = add nsw i32 [[VAL_2]], [[SUM_NEXT_1]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT:    [[VAL_3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
; CHECK-NEXT:    [[SUM_NEXT_3:%.*]] = add nsw i32 [[VAL_3]], [[SUM_NEXT_2]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT:    [[VAL_4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
; CHECK-NEXT:    [[SUM_NEXT_4:%.*]] = add nsw i32 [[VAL_4]], [[SUM_NEXT_3]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT:    [[VAL_5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
; CHECK-NEXT:    [[SUM_NEXT_5:%.*]] = add nsw i32 [[VAL_5]], [[SUM_NEXT_4]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT:    [[VAL_6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
; CHECK-NEXT:    [[SUM_NEXT_6:%.*]] = add nsw i32 [[VAL_6]], [[SUM_NEXT_5]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT:    [[VAL_7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
; CHECK-NEXT:    [[SUM_NEXT_7:%.*]] = add nsw i32 [[VAL_7]], [[SUM_NEXT_6]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_7]]
; CHECK-NEXT:    [[VAL_8:%.*]] = load i32, ptr [[ARRAYIDX_8]], align 4
; CHECK-NEXT:    [[SUM_NEXT_8:%.*]] = add nsw i32 [[VAL_8]], [[SUM_NEXT_7]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 9
; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_8]]
; CHECK-NEXT:    [[VAL_9:%.*]] = load i32, ptr [[ARRAYIDX_9]], align 4
; CHECK-NEXT:    [[SUM_NEXT_9:%.*]] = add nsw i32 [[VAL_9]], [[SUM_NEXT_8]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 10
; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_9]]
; CHECK-NEXT:    [[VAL_10:%.*]] = load i32, ptr [[ARRAYIDX_10]], align 4
; CHECK-NEXT:    [[SUM_NEXT_10:%.*]] = add nsw i32 [[VAL_10]], [[SUM_NEXT_9]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 11
; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_10]]
; CHECK-NEXT:    [[VAL_11:%.*]] = load i32, ptr [[ARRAYIDX_11]], align 4
; CHECK-NEXT:    [[SUM_NEXT_11:%.*]] = add nsw i32 [[VAL_11]], [[SUM_NEXT_10]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 12
; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_11]]
; CHECK-NEXT:    [[VAL_12:%.*]] = load i32, ptr [[ARRAYIDX_12]], align 4
; CHECK-NEXT:    [[SUM_NEXT_12:%.*]] = add nsw i32 [[VAL_12]], [[SUM_NEXT_11]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 13
; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_12]]
; CHECK-NEXT:    [[VAL_13:%.*]] = load i32, ptr [[ARRAYIDX_13]], align 4
; CHECK-NEXT:    [[SUM_NEXT_13:%.*]] = add nsw i32 [[VAL_13]], [[SUM_NEXT_12]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 14
; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_13]]
; CHECK-NEXT:    [[VAL_14:%.*]] = load i32, ptr [[ARRAYIDX_14]], align 4
; CHECK-NEXT:    [[SUM_NEXT_14:%.*]] = add nsw i32 [[VAL_14]], [[SUM_NEXT_13]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 15
; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_14]]
; CHECK-NEXT:    [[VAL_15:%.*]] = load i32, ptr [[ARRAYIDX_15]], align 4
; CHECK-NEXT:    [[SUM_NEXT_127]] = add nsw i32 [[VAL_15]], [[SUM_NEXT_14]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_127]] = add nuw nsw i64 [[INDVARS_IV]], 16
; CHECK-NEXT:    [[EXITCOND_NOT_127:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_127]], 8192
; CHECK-NEXT:    br i1 [[EXITCOND_NOT_127]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_127]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[SUM_NEXT_LCSSA]]
;
entry:
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
  %arrayidx = getelementptr inbounds i32, ptr %ary, i64 %indvars.iv
  %val = load i32, ptr %arrayidx, align 4
  %sum.next = add nsw i32 %val, %sum
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond.not = icmp eq i64 %indvars.iv.next, 8192
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

for.cond.cleanup:
  ret i32 %sum.next
}

define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
; CHECK-LABEL: define i32 @test2(
; CHECK-SAME: ptr [[ARY:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_EPIL:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT:    [[VAL_EPIL:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
; CHECK-NEXT:    [[DUMMY21:%.*]] = mul i32 [[VAL_EPIL]], [[VAL_EPIL]]
; CHECK-NEXT:    [[DUMMY27:%.*]] = mul i32 [[DUMMY21]], [[DUMMY21]]
; CHECK-NEXT:    [[DUMMY28:%.*]] = mul i32 [[DUMMY27]], [[DUMMY27]]
; CHECK-NEXT:    [[DUMMY4:%.*]] = mul i32 [[DUMMY28]], [[DUMMY28]]
; CHECK-NEXT:    [[DUMMY5:%.*]] = mul i32 [[DUMMY4]], [[DUMMY4]]
; CHECK-NEXT:    [[DUMMY6:%.*]] = mul i32 [[DUMMY5]], [[DUMMY5]]
; CHECK-NEXT:    [[DUMMY7:%.*]] = mul i32 [[DUMMY6]], [[DUMMY6]]
; CHECK-NEXT:    [[DUMMY8:%.*]] = mul i32 [[DUMMY7]], [[DUMMY7]]
; CHECK-NEXT:    [[DUMMY9:%.*]] = mul i32 [[DUMMY8]], [[DUMMY8]]
; CHECK-NEXT:    [[DUMMY10:%.*]] = mul i32 [[DUMMY9]], [[DUMMY9]]
; CHECK-NEXT:    [[DUMMY29:%.*]] = mul i32 [[DUMMY10]], [[DUMMY10]]
; CHECK-NEXT:    [[DUMMY30:%.*]] = mul i32 [[DUMMY29]], [[DUMMY29]]
; CHECK-NEXT:    [[DUMMY39:%.*]] = mul i32 [[DUMMY30]], [[DUMMY30]]
; CHECK-NEXT:    [[DUMMY40:%.*]] = mul i32 [[DUMMY39]], [[DUMMY39]]
; CHECK-NEXT:    [[DUMMY15:%.*]] = mul i32 [[DUMMY40]], [[DUMMY40]]
; CHECK-NEXT:    [[DUMMY16:%.*]] = mul i32 [[DUMMY15]], [[DUMMY15]]
; CHECK-NEXT:    [[DUMMY17:%.*]] = mul i32 [[DUMMY16]], [[DUMMY16]]
; CHECK-NEXT:    [[DUMMY18:%.*]] = mul i32 [[DUMMY17]], [[DUMMY17]]
; CHECK-NEXT:    [[DUMMY19:%.*]] = mul i32 [[DUMMY18]], [[DUMMY18]]
; CHECK-NEXT:    [[DUMMY20:%.*]] = mul i32 [[DUMMY19]], [[DUMMY19]]
; CHECK-NEXT:    [[VAL:%.*]] = mul i32 [[DUMMY20]], [[DUMMY20]]
; CHECK-NEXT:    [[DUMMY1:%.*]] = mul i32 [[VAL]], [[VAL]]
; CHECK-NEXT:    [[DUMMY2:%.*]] = mul i32 [[DUMMY1]], [[DUMMY1]]
; CHECK-NEXT:    [[DUMMY3:%.*]] = mul i32 [[DUMMY2]], [[DUMMY2]]
; CHECK-NEXT:    [[DUMMY41:%.*]] = mul i32 [[DUMMY3]], [[DUMMY3]]
; CHECK-NEXT:    [[DUMMY26:%.*]] = mul i32 [[DUMMY41]], [[DUMMY41]]
; CHECK-NEXT:    [[DUMMY11:%.*]] = mul i32 [[DUMMY26]], [[DUMMY26]]
; CHECK-NEXT:    [[DUMMY12:%.*]] = mul i32 [[DUMMY11]], [[DUMMY11]]
; CHECK-NEXT:    [[DUMMY13:%.*]] = mul i32 [[DUMMY12]], [[DUMMY12]]
; CHECK-NEXT:    [[DUMMY14:%.*]] = mul i32 [[DUMMY13]], [[DUMMY13]]
; CHECK-NEXT:    [[DUMMY31:%.*]] = mul i32 [[DUMMY14]], [[DUMMY14]]
; CHECK-NEXT:    [[DUMMY32:%.*]] = mul i32 [[DUMMY31]], [[DUMMY31]]
; CHECK-NEXT:    [[DUMMY22:%.*]] = mul i32 [[DUMMY32]], [[DUMMY32]]
; CHECK-NEXT:    [[DUMMY23:%.*]] = mul i32 [[DUMMY22]], [[DUMMY22]]
; CHECK-NEXT:    [[DUMMY24:%.*]] = mul i32 [[DUMMY23]], [[DUMMY23]]
; CHECK-NEXT:    [[DUMMY25:%.*]] = mul i32 [[DUMMY24]], [[DUMMY24]]
; CHECK-NEXT:    [[DUMMY37:%.*]] = mul i32 [[DUMMY25]], [[DUMMY25]]
; CHECK-NEXT:    [[DUMMY38:%.*]] = mul i32 [[DUMMY37]], [[DUMMY37]]
; CHECK-NEXT:    [[DUMMY33:%.*]] = mul i32 [[DUMMY38]], [[DUMMY38]]
; CHECK-NEXT:    [[DUMMY34:%.*]] = mul i32 [[DUMMY33]], [[DUMMY33]]
; CHECK-NEXT:    [[DUMMY35:%.*]] = mul i32 [[DUMMY34]], [[DUMMY34]]
; CHECK-NEXT:    [[DUMMY36:%.*]] = mul i32 [[DUMMY35]], [[DUMMY35]]
; CHECK-NEXT:    [[DUMMY43:%.*]] = mul i32 [[DUMMY36]], [[DUMMY36]]
; CHECK-NEXT:    [[DUMMY44:%.*]] = mul i32 [[DUMMY43]], [[DUMMY43]]
; CHECK-NEXT:    [[DUMMY45:%.*]] = mul i32 [[DUMMY44]], [[DUMMY44]]
; CHECK-NEXT:    [[DUMMY46:%.*]] = mul i32 [[DUMMY45]], [[DUMMY45]]
; CHECK-NEXT:    [[DUMMY47:%.*]] = mul i32 [[DUMMY46]], [[DUMMY46]]
; CHECK-NEXT:    [[DUMMY48:%.*]] = mul i32 [[DUMMY47]], [[DUMMY47]]
; CHECK-NEXT:    [[DUMMY49:%.*]] = mul i32 [[DUMMY48]], [[DUMMY48]]
; CHECK-NEXT:    [[DUMMY50_EPIL:%.*]] = mul i32 [[DUMMY49]], [[DUMMY49]]
; CHECK-NEXT:    [[SUM_NEXT_EPIL]] = add nsw i32 [[DUMMY50_EPIL]], [[SUM_EPIL]]
; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT:    [[EXITCOND_NOT_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
; CHECK-NEXT:    br i1 [[EXITCOND_NOT_EPIL]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_EPIL]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[SUM_NEXT_LCSSA]]
;
entry:
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
  %arrayidx = getelementptr inbounds i32, ptr %ary, i64 %indvars.iv
  %val = load i32, ptr %arrayidx, align 4
  %dummy1 = mul i32 %val, %val
  %dummy2 = mul i32 %dummy1, %dummy1
  %dummy3 = mul i32 %dummy2, %dummy2
  %dummy4 = mul i32 %dummy3, %dummy3
  %dummy5 = mul i32 %dummy4, %dummy4
  %dummy6 = mul i32 %dummy5, %dummy5
  %dummy7 = mul i32 %dummy6, %dummy6
  %dummy8 = mul i32 %dummy7, %dummy7
  %dummy9 = mul i32 %dummy8, %dummy8
  %dummy10 = mul i32 %dummy9, %dummy9
  %dummy11 = mul i32 %dummy10, %dummy10
  %dummy12 = mul i32 %dummy11, %dummy11
  %dummy13 = mul i32 %dummy12, %dummy12
  %dummy14 = mul i32 %dummy13, %dummy13
  %dummy15 = mul i32 %dummy14, %dummy14
  %dummy16 = mul i32 %dummy15, %dummy15
  %dummy17 = mul i32 %dummy16, %dummy16
  %dummy18 = mul i32 %dummy17, %dummy17
  %dummy19 = mul i32 %dummy18, %dummy18
  %dummy20 = mul i32 %dummy19, %dummy19
  %dummy21 = mul i32 %dummy20, %dummy20
  %dummy22 = mul i32 %dummy21, %dummy21
  %dummy23 = mul i32 %dummy22, %dummy22
  %dummy24 = mul i32 %dummy23, %dummy23
  %dummy25 = mul i32 %dummy24, %dummy24
  %dummy26 = mul i32 %dummy25, %dummy25
  %dummy27 = mul i32 %dummy26, %dummy26
  %dummy28 = mul i32 %dummy27, %dummy27
  %dummy29 = mul i32 %dummy28, %dummy28
  %dummy30 = mul i32 %dummy29, %dummy29
  %dummy31 = mul i32 %dummy30, %dummy30
  %dummy32 = mul i32 %dummy31, %dummy31
  %dummy33 = mul i32 %dummy32, %dummy32
  %dummy34 = mul i32 %dummy33, %dummy33
  %dummy35 = mul i32 %dummy34, %dummy34
  %dummy36 = mul i32 %dummy35, %dummy35
  %dummy37 = mul i32 %dummy36, %dummy36
  %dummy38 = mul i32 %dummy37, %dummy37
  %dummy39 = mul i32 %dummy38, %dummy38
  %dummy40 = mul i32 %dummy39, %dummy39
  %dummy41 = mul i32 %dummy40, %dummy40
  %dummy42 = mul i32 %dummy41, %dummy41
  %dummy43 = mul i32 %dummy42, %dummy42
  %dummy44 = mul i32 %dummy43, %dummy43
  %dummy45 = mul i32 %dummy44, %dummy44
  %dummy46 = mul i32 %dummy45, %dummy45
  %dummy47 = mul i32 %dummy46, %dummy46
  %dummy48 = mul i32 %dummy47, %dummy47
  %dummy49 = mul i32 %dummy48, %dummy48
  %dummy50 = mul i32 %dummy49, %dummy49
  %sum.next = add nsw i32 %dummy50, %sum
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

for.cond.cleanup:
  ret i32 %sum.next
}