; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='default<O2>' -enable-matrix -S %s | FileCheck %s
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
target triple = "systemz"
@ARR = internal global [100 x i32] zeroinitializer, align 4
; This test uses 'sub' instructions for gep offsets to allow
; codegen (LSR) to create optimal asm. If 'sub' is canonicalized
; to 'xor', then the backend needs to be able to see through
; that transform to produce optimal asm.
define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: tail call void @populate(ptr noundef nonnull @ARR) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY4]] ]
; CHECK-NEXT: [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ]
; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i64 0, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[IDX_NEG]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]]
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[SUM_11]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_NEG:%.*]] = xor i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_NEG]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_112_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_112_NEG]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_219_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_219_NEG]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_3_NEG]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_4_NEG]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_5_NEG]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]]
; CHECK-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_6_NEG]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32
; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: for.body4.1:
; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ]
; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ]
; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[IDX_NEG_1]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_NEG:%.*]] = xor i64 [[INDVARS_IV_1]], -1
; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_NEG]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_1_NEG]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_2_NEG]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_3_NEG]]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_4_NEG]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_5_NEG]]
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]]
; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_1_6_NEG]]
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1
; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8
; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32
; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
; CHECK: for.body4.2:
; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ]
; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ]
; CHECK-NEXT: [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[IDX_NEG_2]]
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP24]], 3
; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_NEG:%.*]] = xor i64 [[INDVARS_IV_2]], -1
; CHECK-NEXT: [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_NEG]]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP25]], 3
; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_1_NEG:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_1_NEG]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP26]], 3
; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_2_NEG:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_2_NEG]]
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP27]], 3
; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_3_NEG:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_3_NEG]]
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP28]], 3
; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_4_NEG:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_4_NEG]]
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP29]], 3
; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_5_NEG:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_5_NEG]]
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP30]], 3
; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_6_NEG:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]]
; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds (i8, ptr @ARR, i64 396), i64 [[INDVARS_IV_NEXT_2_6_NEG]]
; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3
; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8
; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32
; CHECK-NEXT: br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
; CHECK: for.inc5.2:
; CHECK-NEXT: ret i32 [[ADD_2_7]]
;
entry:
call void @populate(ptr noundef @ARR)
br label %for.cond
for.cond: ; preds = %for.inc5, %entry
%j.0 = phi i32 [ 1, %entry ], [ %inc6, %for.inc5 ]
%sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc5 ]
%cmp = icmp slt i32 %j.0, 4
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
br label %for.end7
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%sum.1 = phi i32 [ %sum.0, %for.body ], [ %add, %for.inc ]
%i.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%cmp2 = icmp slt i32 %i.0, 32
br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
for.cond.cleanup3: ; preds = %for.cond1
br label %for.end
for.body4: ; preds = %for.cond1
%idx.ext = sext i32 %i.0 to i64
%idx.neg = sub i64 0, %idx.ext
%add.ptr = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 %idx.neg
%0 = load i32, ptr %add.ptr, align 4, !tbaa !3
%mul = mul i32 %j.0, %0
%add = add i32 %sum.1, %mul
br label %for.inc
for.inc: ; preds = %for.body4
%inc = add nsw i32 %i.0, 1
br label %for.cond1, !llvm.loop !7
for.end: ; preds = %for.cond.cleanup3
br label %for.inc5
for.inc5: ; preds = %for.end
%inc6 = add nsw i32 %j.0, 1
br label %for.cond, !llvm.loop !9
for.end7: ; preds = %for.cond.cleanup
ret i32 %sum.0
}
declare dso_local void @populate(ptr noundef) #1
attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="z10" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="z10" }
attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0bfef0669075f229fd325d8c8521c9adfb453f83)"}
!3 = !{!4, !4, i64 0}
!4 = !{!"int", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = distinct !{!7, !8}
!8 = !{!"llvm.loop.mustprogress"}
!9 = distinct !{!9, !8}