; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -S %s | FileCheck %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 {
; CHECK-LABEL: define i32 @iv_used_widened_and_truncated(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD2]]
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: [[T:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[C]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep = getelementptr { i32, [ 8 x i32 ]}, ptr %dst, i64 %iv
%t = trunc i64 %iv to i32
store i32 %t, ptr %gep, align 8
%iv.next = add i64 %iv, 1
%c = icmp eq i64 %iv, %N
br i1 %c, label %exit, label %loop
exit:
ret i32 0
}
define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) {
; CHECK-LABEL: define void @multiple_truncated_ivs_with_wide_uses(
; CHECK-SAME: i1 [[C:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 130
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 260
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> <i16 10, i16 10, i16 10, i16 10>
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> <i16 10, i16 10, i16 10, i16 10>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4
; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4
; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP10]], align 4, !alias.scope [[META7]]
; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META7]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_16:%.*]] = trunc i64 [[IV]] to i16
; CHECK-NEXT: [[IV_32:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i16 [[IV_16]], i16 10
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i16, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store i16 [[SEL]], ptr [[GEP_A]], align 2
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: store i32 [[IV_32]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.16 = trunc i64 %iv to i16
%iv.32 = trunc i64 %iv to i32
%sel = select i1 %c, i16 %iv.16, i16 10
%gep.A = getelementptr i16, ptr %A, i64 %iv
store i16 %sel, ptr %gep.A
%gep.B = getelementptr i32, ptr %B, i64 %iv
store i32 %iv.32, ptr %gep.B
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 64
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) {
; CHECK-LABEL: define void @truncated_ivs_with_wide_and_scalar_uses(
; CHECK-SAME: i1 [[C:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 8
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8
; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2
; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_16:%.*]] = trunc i64 [[IV]] to i16
; CHECK-NEXT: [[IV_32:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i32 [[IV_32]]
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i16 [[IV_16]], i16 10
; CHECK-NEXT: store i16 [[SEL]], ptr [[GEP]], align 2
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.16 = trunc i64 %iv to i16
%iv.32 = trunc i64 %iv to i32
%gep = getelementptr i16, ptr %dst, i32 %iv.32
%sel = select i1 %c, i16 %iv.16, i16 10
store i16 %sel, ptr %gep
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 64
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-LABEL: define void @multiple_pointer_ivs_with_scalar_uses_only(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8589934391
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 4294967196
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 8589934368
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2048>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP18:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[NEXT_GEP19:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP13]]
; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[NEXT_GEP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[NEXT_GEP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP23]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1, !alias.scope [[META13:![0-9]+]]
; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP22]], i32 0
; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1, !alias.scope [[META13]]
; CHECK-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[WIDE_LOAD24]] to <16 x i32>
; CHECK-NEXT: [[TMP22]] = add <16 x i32> [[TMP19]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP22]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP25:%.*]] = trunc <16 x i32> [[TMP24]] to <16 x i8>
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP25]], i32 0
; CHECK-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META16:![0-9]+]], !noalias [[META13]]
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP25]], i32 1
; CHECK-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP7]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP25]], i32 2
; CHECK-NEXT: store i8 [[TMP28]], ptr [[NEXT_GEP8]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i8> [[TMP25]], i32 3
; CHECK-NEXT: store i8 [[TMP29]], ptr [[NEXT_GEP9]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP25]], i32 4
; CHECK-NEXT: store i8 [[TMP30]], ptr [[NEXT_GEP10]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP25]], i32 5
; CHECK-NEXT: store i8 [[TMP31]], ptr [[NEXT_GEP11]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP25]], i32 6
; CHECK-NEXT: store i8 [[TMP32]], ptr [[NEXT_GEP12]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i8> [[TMP25]], i32 7
; CHECK-NEXT: store i8 [[TMP33]], ptr [[NEXT_GEP13]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP25]], i32 8
; CHECK-NEXT: store i8 [[TMP34]], ptr [[NEXT_GEP14]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP25]], i32 9
; CHECK-NEXT: store i8 [[TMP35]], ptr [[NEXT_GEP15]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP25]], i32 10
; CHECK-NEXT: store i8 [[TMP36]], ptr [[NEXT_GEP16]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i8> [[TMP25]], i32 11
; CHECK-NEXT: store i8 [[TMP37]], ptr [[NEXT_GEP17]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP25]], i32 12
; CHECK-NEXT: store i8 [[TMP38]], ptr [[NEXT_GEP18]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP25]], i32 13
; CHECK-NEXT: store i8 [[TMP39]], ptr [[NEXT_GEP19]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP25]], i32 14
; CHECK-NEXT: store i8 [[TMP40]], ptr [[NEXT_GEP20]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i8> [[TMP25]], i32 15
; CHECK-NEXT: store i8 [[TMP41]], ptr [[NEXT_GEP21]], align 1, !alias.scope [[META16]], !noalias [[META13]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967184
; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD38:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[OUTPTR_0:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[INCDEC_PTR36:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[INCDEC_PTR33:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[INCDEC_PTR33]] = getelementptr i8, ptr [[PTR_IV_3]], i64 1
; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[PTR_IV_3]], align 1
; CHECK-NEXT: [[CONV34:%.*]] = zext i8 [[TMP43]] to i32
; CHECK-NEXT: [[INCDEC_PTR36]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1
; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[PTR_IV_2]], align 1
; CHECK-NEXT: [[CONV37:%.*]] = zext i8 [[TMP44]] to i32
; CHECK-NEXT: [[ADD38]] = add i32 [[CONV34]], [[CONV37]]
; CHECK-NEXT: [[SHR42:%.*]] = lshr i32 [[SCALAR_RECUR]], 1
; CHECK-NEXT: [[CONV43:%.*]] = trunc i32 [[SHR42]] to i8
; CHECK-NEXT: store i8 [[CONV43]], ptr [[PTR_IV_1]], align 1
; CHECK-NEXT: [[DEC]] = add i32 [[IV_1]], 1
; CHECK-NEXT: [[OUTPTR_0]] = getelementptr i8, ptr [[PTR_IV_1]], i64 2
; CHECK-NEXT: [[CMP30_NOT:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[CMP30_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv.1 = phi i32 [ 100, %entry ], [ %dec, %loop ]
%iv.2 = phi i32 [ 2048, %entry ], [ %add38, %loop ]
%ptr.iv.1 = phi ptr [ %A, %entry ], [ %outptr.0, %loop ]
%ptr.iv.2 = phi ptr [ %B, %entry ], [ %incdec.ptr36, %loop ]
%ptr.iv.3 = phi ptr [ %B, %entry ], [ %incdec.ptr33, %loop ]
%incdec.ptr33 = getelementptr i8, ptr %ptr.iv.3, i64 1
%0 = load i8, ptr %ptr.iv.3, align 1
%conv34 = zext i8 %0 to i32
%incdec.ptr36 = getelementptr i8, ptr %ptr.iv.2, i64 1
%1 = load i8, ptr %ptr.iv.2, align 1
%conv37 = zext i8 %1 to i32
%add38 = add i32 %conv34, %conv37
%shr42 = lshr i32 %iv.2, 1
%conv43 = trunc i32 %shr42 to i8
store i8 %conv43, ptr %ptr.iv.1, align 1
%dec = add i32 %iv.1, 1
%outptr.0 = getelementptr i8, ptr %ptr.iv.1, i64 2
%cmp30.not = icmp eq i32 %dec, 0
br i1 %cmp30.not, label %exit, label %loop
exit:
ret void
}
define i16 @iv_and_step_trunc() {
; CHECK-LABEL: define i16 @iv_and_step_trunc() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ <i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16>
; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[IV]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_NEXT]] to i16
; CHECK-NEXT: [[REC_NEXT]] = mul i16 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i16 [[REC_LCSSA]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rec = phi i16 [ 0, %entry ], [ %rec.next, %loop ]
%iv.next = add i64 %iv, 1
%0 = trunc i64 %iv to i16
%1 = trunc i64 %iv.next to i16
%rec.next = mul i16 %0, %1
%ec = icmp eq i64 %iv, 1
br i1 %ec, label %exit, label %loop
exit:
ret i16 %rec
}
define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
; CHECK-LABEL: define i32 @test_scalar_predicated_cost(
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[Y]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i64> poison, i64 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT4]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]]
; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]]
; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]]
; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD2]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i64> [[TMP12]] to <8 x i32>
; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32>
; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32>
; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32>
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr [[TMP24]], i32 4, <8 x i1> [[TMP8]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr [[TMP25]], i32 4, <8 x i1> [[TMP9]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[CMP9_NOT:%.*]] = icmp ule i64 [[IV]], [[Y]]
; CHECK-NEXT: br i1 [[CMP9_NOT]], label [[LOOP_LATCH]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[IV]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[T:%.*]] = trunc i64 [[OR]] to i32
; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 4
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp9.not = icmp ule i64 %iv, %y
br i1 %cmp9.not, label %loop.latch, label %if.then
if.then:
%or = or i64 %x, %iv
%gep = getelementptr i32, ptr %A, i64 %iv
%t = trunc i64 %or to i32
store i32 %t, ptr %gep, align 4
br label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 100
br i1 %ec, label %exit, label %loop.header
exit:
ret i32 0
}
define void @wide_iv_trunc(ptr %dst, i64 %N) {
; CHECK-LABEL: define void @wide_iv_trunc(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[N]], 8
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 0
; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP1]], 2
; CHECK-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP1]], 3
; CHECK-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[DST]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%cmp = icmp ult i64 %N, 8
br i1 %cmp, label %loop, label %exit
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.trunc = trunc i64 %iv to i32
store i32 %iv.trunc, ptr %dst, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %N
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @wide_iv_trunc_reuse(ptr %dst) {
; CHECK-LABEL: define void @wide_iv_trunc_reuse(
; CHECK-SAME: ptr [[DST:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
; CHECK-NEXT: store i32 [[IV_2]], ptr [[DST]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
%iv.2 = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
store i32 %iv.2, ptr %dst, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 0
%iv.trunc = trunc i64 %iv to i32
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @wombat(i32 %arg, ptr %dst) #1 {
entry:
%mul = mul i32 %arg, 3
%zext = zext i32 %arg to i64
br label %loop
loop:
%phi = phi i64 [ 4, %entry ], [ %add, %loop ]
%phi2 = phi i32 [ %mul, %entry ], [ %trunc, %loop ]
%getelementptr = getelementptr i32, ptr %dst, i64 %phi
%and = and i32 %phi2, 12
store i32 %and, ptr %getelementptr, align 4
%mul3 = mul i64 %phi, %zext
%add = add i64 %phi, 1
%icmp = icmp ugt i64 %phi, 65
%trunc = trunc i64 %mul3 to i32
br i1 %icmp, label %exit, label %loop
exit:
ret void
}
define void @wombat2(i32 %arg, ptr %dst) #1 {
entry:
%mul = mul i32 %arg, 3
%zext = zext i32 %arg to i64
br label %loop
loop:
%phi = phi i64 [ 4, %entry ], [ %add, %loop ]
%phi2 = phi i32 [ %mul, %entry ], [ %trunc.1, %loop ]
%getelementptr = getelementptr i32, ptr %dst, i64 %phi
%and = and i32 %phi2, 12
store i32 %and, ptr %getelementptr, align 4
%mul3 = mul i64 %phi, %zext
%add = add i64 %phi, 1
%icmp = icmp ugt i64 %phi, 65
%trunc.0 = trunc i64 %mul3 to i60
%trunc.1 = trunc i60 %trunc.0 to i32
br i1 %icmp, label %exit, label %loop
exit:
ret void
}
define void @with_dead_use(i32 %arg, ptr %dst) #1 {
entry:
%mul = mul i32 %arg, 3
%zext = zext i32 %arg to i64
br label %loop
loop:
%phi = phi i64 [ 4, %entry ], [ %add, %loop ]
%phi2 = phi i32 [ %mul, %entry ], [ %trunc, %loop ]
%getelementptr = getelementptr i32, ptr %dst, i64 %phi
%and = and i32 %phi2, 12
store i32 %and, ptr %getelementptr, align 4
%mul3 = mul i64 %phi, %zext
%add = add i64 %phi, 1
%icmp = icmp ugt i64 %phi, 65
%trunc = trunc i64 %mul3 to i32
%dead.and = and i32 %trunc, 123
br i1 %icmp, label %exit, label %loop
exit:
ret void
}
attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }
attributes #1 = { "target-cpu"="skylake-avx512" "target-features"="-avx512f" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
; CHECK: [[META6]] = distinct !{[[META6]], !"LVerDomain"}
; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
; CHECK: [[META13]] = !{[[META14:![0-9]+]]}
; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]}
; CHECK: [[META15]] = distinct !{[[META15]], !"LVerDomain"}
; CHECK: [[META16]] = !{[[META17:![0-9]+]]}
; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]}
; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]}
; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]}
; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]}
; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
;.