; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
; This test has mutual reductions, referencing the same data:
; for i = ...
; sm += x[i];
; sq += xptr x[i];
; It currently doesn't SLP vectorize, but should.
define i64 @straight(ptr nocapture noundef readonly %p, i32 noundef %st) {
; CHECK-LABEL: @straight(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[P]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[ADD_PTR]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[ADD_PTR_1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr [[ADD_PTR_2]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[ADD_PTR_3]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr [[ADD_PTR_4]], align 2
; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[ADD_PTR_5]], align 2
; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr [[ADD_PTR_6]], align 2
; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> poison, <8 x i16> [[TMP0]], i64 0)
; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP8]], <8 x i16> [[TMP1]], i64 8)
; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP9]], <8 x i16> [[TMP2]], i64 16)
; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP10]], <8 x i16> [[TMP3]], i64 24)
; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP11]], <8 x i16> [[TMP4]], i64 32)
; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP12]], <8 x i16> [[TMP5]], i64 40)
; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP13]], <8 x i16> [[TMP6]], i64 48)
; CHECK-NEXT: [[TMP15:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP14]], <8 x i16> [[TMP7]], i64 56)
; CHECK-NEXT: [[TMP16:%.*]] = zext <64 x i16> [[TMP15]] to <64 x i32>
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <64 x i32> [[TMP16]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <64 x i32> [[TMP16]], i32 1
; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP19:%.*]] = mul nuw nsw <64 x i32> [[TMP16]], [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <64 x i32> [[TMP16]], i32 2
; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <64 x i32> [[TMP16]], i32 3
; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], [[TMP21]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <64 x i32> [[TMP16]], i32 4
; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], [[TMP22]]
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <64 x i32> [[TMP16]], i32 5
; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], [[TMP23]]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <64 x i32> [[TMP16]], i32 6
; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i32 [[ADD_5]], [[TMP24]]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <64 x i32> [[TMP16]], i32 7
; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i32 [[ADD_6]], [[TMP25]]
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <64 x i32> [[TMP16]], i32 8
; CHECK-NEXT: [[ADD_141:%.*]] = add nuw nsw i32 [[ADD_7]], [[TMP26]]
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <64 x i32> [[TMP16]], i32 9
; CHECK-NEXT: [[ADD_1_1:%.*]] = add nuw nsw i32 [[ADD_141]], [[TMP27]]
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <64 x i32> [[TMP16]], i32 10
; CHECK-NEXT: [[ADD_2_1:%.*]] = add nuw nsw i32 [[ADD_1_1]], [[TMP28]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <64 x i32> [[TMP16]], i32 11
; CHECK-NEXT: [[ADD_3_1:%.*]] = add nuw nsw i32 [[ADD_2_1]], [[TMP29]]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <64 x i32> [[TMP16]], i32 12
; CHECK-NEXT: [[ADD_4_1:%.*]] = add nuw nsw i32 [[ADD_3_1]], [[TMP30]]
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <64 x i32> [[TMP16]], i32 13
; CHECK-NEXT: [[ADD_5_1:%.*]] = add nuw nsw i32 [[ADD_4_1]], [[TMP31]]
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <64 x i32> [[TMP16]], i32 14
; CHECK-NEXT: [[ADD_6_1:%.*]] = add nuw nsw i32 [[ADD_5_1]], [[TMP32]]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <64 x i32> [[TMP16]], i32 15
; CHECK-NEXT: [[ADD_7_1:%.*]] = add nuw nsw i32 [[ADD_6_1]], [[TMP33]]
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <64 x i32> [[TMP16]], i32 16
; CHECK-NEXT: [[ADD_245:%.*]] = add nuw nsw i32 [[ADD_7_1]], [[TMP34]]
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <64 x i32> [[TMP16]], i32 17
; CHECK-NEXT: [[ADD_1_2:%.*]] = add nuw nsw i32 [[ADD_245]], [[TMP35]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <64 x i32> [[TMP16]], i32 18
; CHECK-NEXT: [[ADD_2_2:%.*]] = add nuw nsw i32 [[ADD_1_2]], [[TMP36]]
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <64 x i32> [[TMP16]], i32 19
; CHECK-NEXT: [[ADD_3_2:%.*]] = add nuw nsw i32 [[ADD_2_2]], [[TMP37]]
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <64 x i32> [[TMP16]], i32 20
; CHECK-NEXT: [[ADD_4_2:%.*]] = add nuw nsw i32 [[ADD_3_2]], [[TMP38]]
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <64 x i32> [[TMP16]], i32 21
; CHECK-NEXT: [[ADD_5_2:%.*]] = add nuw nsw i32 [[ADD_4_2]], [[TMP39]]
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <64 x i32> [[TMP16]], i32 22
; CHECK-NEXT: [[ADD_6_2:%.*]] = add nuw nsw i32 [[ADD_5_2]], [[TMP40]]
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <64 x i32> [[TMP16]], i32 23
; CHECK-NEXT: [[ADD_7_2:%.*]] = add nuw nsw i32 [[ADD_6_2]], [[TMP41]]
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <64 x i32> [[TMP16]], i32 24
; CHECK-NEXT: [[ADD_349:%.*]] = add nuw nsw i32 [[ADD_7_2]], [[TMP42]]
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <64 x i32> [[TMP16]], i32 25
; CHECK-NEXT: [[ADD_1_3:%.*]] = add nuw nsw i32 [[ADD_349]], [[TMP43]]
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <64 x i32> [[TMP16]], i32 26
; CHECK-NEXT: [[ADD_2_3:%.*]] = add nuw nsw i32 [[ADD_1_3]], [[TMP44]]
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <64 x i32> [[TMP16]], i32 27
; CHECK-NEXT: [[ADD_3_3:%.*]] = add nuw nsw i32 [[ADD_2_3]], [[TMP45]]
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <64 x i32> [[TMP16]], i32 28
; CHECK-NEXT: [[ADD_4_3:%.*]] = add nuw nsw i32 [[ADD_3_3]], [[TMP46]]
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <64 x i32> [[TMP16]], i32 29
; CHECK-NEXT: [[ADD_5_3:%.*]] = add nuw nsw i32 [[ADD_4_3]], [[TMP47]]
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <64 x i32> [[TMP16]], i32 30
; CHECK-NEXT: [[ADD_6_3:%.*]] = add nuw nsw i32 [[ADD_5_3]], [[TMP48]]
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <64 x i32> [[TMP16]], i32 31
; CHECK-NEXT: [[ADD_7_3:%.*]] = add nuw nsw i32 [[ADD_6_3]], [[TMP49]]
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <64 x i32> [[TMP16]], i32 32
; CHECK-NEXT: [[ADD_453:%.*]] = add nuw nsw i32 [[ADD_7_3]], [[TMP50]]
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <64 x i32> [[TMP16]], i32 33
; CHECK-NEXT: [[ADD_1_4:%.*]] = add nuw nsw i32 [[ADD_453]], [[TMP51]]
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <64 x i32> [[TMP16]], i32 34
; CHECK-NEXT: [[ADD_2_4:%.*]] = add nuw nsw i32 [[ADD_1_4]], [[TMP52]]
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <64 x i32> [[TMP16]], i32 35
; CHECK-NEXT: [[ADD_3_4:%.*]] = add nuw nsw i32 [[ADD_2_4]], [[TMP53]]
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <64 x i32> [[TMP16]], i32 36
; CHECK-NEXT: [[ADD_4_4:%.*]] = add nuw nsw i32 [[ADD_3_4]], [[TMP54]]
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <64 x i32> [[TMP16]], i32 37
; CHECK-NEXT: [[ADD_5_4:%.*]] = add nuw nsw i32 [[ADD_4_4]], [[TMP55]]
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <64 x i32> [[TMP16]], i32 38
; CHECK-NEXT: [[ADD_6_4:%.*]] = add nuw nsw i32 [[ADD_5_4]], [[TMP56]]
; CHECK-NEXT: [[TMP57:%.*]] = extractelement <64 x i32> [[TMP16]], i32 39
; CHECK-NEXT: [[ADD_7_4:%.*]] = add nuw nsw i32 [[ADD_6_4]], [[TMP57]]
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <64 x i32> [[TMP16]], i32 40
; CHECK-NEXT: [[ADD_557:%.*]] = add nuw nsw i32 [[ADD_7_4]], [[TMP58]]
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <64 x i32> [[TMP16]], i32 41
; CHECK-NEXT: [[ADD_1_5:%.*]] = add nuw nsw i32 [[ADD_557]], [[TMP59]]
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <64 x i32> [[TMP16]], i32 42
; CHECK-NEXT: [[ADD_2_5:%.*]] = add nuw nsw i32 [[ADD_1_5]], [[TMP60]]
; CHECK-NEXT: [[TMP61:%.*]] = extractelement <64 x i32> [[TMP16]], i32 43
; CHECK-NEXT: [[ADD_3_5:%.*]] = add nuw nsw i32 [[ADD_2_5]], [[TMP61]]
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <64 x i32> [[TMP16]], i32 44
; CHECK-NEXT: [[ADD_4_5:%.*]] = add nuw nsw i32 [[ADD_3_5]], [[TMP62]]
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <64 x i32> [[TMP16]], i32 45
; CHECK-NEXT: [[ADD_5_5:%.*]] = add nuw nsw i32 [[ADD_4_5]], [[TMP63]]
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <64 x i32> [[TMP16]], i32 46
; CHECK-NEXT: [[ADD_6_5:%.*]] = add nuw nsw i32 [[ADD_5_5]], [[TMP64]]
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <64 x i32> [[TMP16]], i32 47
; CHECK-NEXT: [[ADD_7_5:%.*]] = add nuw nsw i32 [[ADD_6_5]], [[TMP65]]
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <64 x i32> [[TMP16]], i32 48
; CHECK-NEXT: [[ADD_661:%.*]] = add nuw nsw i32 [[ADD_7_5]], [[TMP66]]
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <64 x i32> [[TMP16]], i32 49
; CHECK-NEXT: [[ADD_1_6:%.*]] = add nuw nsw i32 [[ADD_661]], [[TMP67]]
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <64 x i32> [[TMP16]], i32 50
; CHECK-NEXT: [[ADD_2_6:%.*]] = add nuw nsw i32 [[ADD_1_6]], [[TMP68]]
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <64 x i32> [[TMP16]], i32 51
; CHECK-NEXT: [[ADD_3_6:%.*]] = add nuw nsw i32 [[ADD_2_6]], [[TMP69]]
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <64 x i32> [[TMP16]], i32 52
; CHECK-NEXT: [[ADD_4_6:%.*]] = add nuw nsw i32 [[ADD_3_6]], [[TMP70]]
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <64 x i32> [[TMP16]], i32 53
; CHECK-NEXT: [[ADD_5_6:%.*]] = add nuw nsw i32 [[ADD_4_6]], [[TMP71]]
; CHECK-NEXT: [[TMP72:%.*]] = extractelement <64 x i32> [[TMP16]], i32 54
; CHECK-NEXT: [[ADD_6_6:%.*]] = add nuw nsw i32 [[ADD_5_6]], [[TMP72]]
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <64 x i32> [[TMP16]], i32 55
; CHECK-NEXT: [[ADD_7_6:%.*]] = add nuw nsw i32 [[ADD_6_6]], [[TMP73]]
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <64 x i32> [[TMP16]], i32 56
; CHECK-NEXT: [[ADD_765:%.*]] = add nuw nsw i32 [[ADD_7_6]], [[TMP74]]
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <64 x i32> [[TMP16]], i32 57
; CHECK-NEXT: [[ADD_1_7:%.*]] = add nuw nsw i32 [[ADD_765]], [[TMP75]]
; CHECK-NEXT: [[TMP76:%.*]] = extractelement <64 x i32> [[TMP16]], i32 58
; CHECK-NEXT: [[ADD_2_7:%.*]] = add nuw nsw i32 [[ADD_1_7]], [[TMP76]]
; CHECK-NEXT: [[TMP77:%.*]] = extractelement <64 x i32> [[TMP16]], i32 59
; CHECK-NEXT: [[ADD_3_7:%.*]] = add nuw nsw i32 [[ADD_2_7]], [[TMP77]]
; CHECK-NEXT: [[TMP78:%.*]] = extractelement <64 x i32> [[TMP16]], i32 60
; CHECK-NEXT: [[ADD_4_7:%.*]] = add nuw nsw i32 [[ADD_3_7]], [[TMP78]]
; CHECK-NEXT: [[TMP79:%.*]] = extractelement <64 x i32> [[TMP16]], i32 61
; CHECK-NEXT: [[ADD_5_7:%.*]] = add nuw nsw i32 [[ADD_4_7]], [[TMP79]]
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <64 x i32> [[TMP16]], i32 62
; CHECK-NEXT: [[ADD_6_7:%.*]] = add nuw nsw i32 [[ADD_5_7]], [[TMP80]]
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <64 x i32> [[TMP16]], i32 63
; CHECK-NEXT: [[ADD_7_7:%.*]] = add nuw nsw i32 [[ADD_6_7]], [[TMP81]]
; CHECK-NEXT: [[TMP82:%.*]] = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> [[TMP19]])
; CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[ADD_7_7]] to i64
; CHECK-NEXT: [[CONV16:%.*]] = zext i32 [[TMP82]] to i64
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[CONV16]], 32
; CHECK-NEXT: [[ADD17:%.*]] = or i64 [[SHL]], [[CONV15]]
; CHECK-NEXT: ret i64 [[ADD17]]
;
entry:
%idx.ext = sext i32 %st to i64
%0 = load i16, ptr %p, align 2
%conv = zext i16 %0 to i32
%mul = mul nuw nsw i32 %conv, %conv
%arrayidx.1 = getelementptr inbounds i16, ptr %p, i64 1
%1 = load i16, ptr %arrayidx.1, align 2
%conv.1 = zext i16 %1 to i32
%add.1 = add nuw nsw i32 %conv, %conv.1
%mul.1 = mul nuw nsw i32 %conv.1, %conv.1
%add11.1 = add nuw i32 %mul.1, %mul
%arrayidx.2 = getelementptr inbounds i16, ptr %p, i64 2
%2 = load i16, ptr %arrayidx.2, align 2
%conv.2 = zext i16 %2 to i32
%add.2 = add nuw nsw i32 %add.1, %conv.2
%mul.2 = mul nuw nsw i32 %conv.2, %conv.2
%add11.2 = add i32 %mul.2, %add11.1
%arrayidx.3 = getelementptr inbounds i16, ptr %p, i64 3
%3 = load i16, ptr %arrayidx.3, align 2
%conv.3 = zext i16 %3 to i32
%add.3 = add nuw nsw i32 %add.2, %conv.3
%mul.3 = mul nuw nsw i32 %conv.3, %conv.3
%add11.3 = add i32 %mul.3, %add11.2
%arrayidx.4 = getelementptr inbounds i16, ptr %p, i64 4
%4 = load i16, ptr %arrayidx.4, align 2
%conv.4 = zext i16 %4 to i32
%add.4 = add nuw nsw i32 %add.3, %conv.4
%mul.4 = mul nuw nsw i32 %conv.4, %conv.4
%add11.4 = add i32 %mul.4, %add11.3
%arrayidx.5 = getelementptr inbounds i16, ptr %p, i64 5
%5 = load i16, ptr %arrayidx.5, align 2
%conv.5 = zext i16 %5 to i32
%add.5 = add nuw nsw i32 %add.4, %conv.5
%mul.5 = mul nuw nsw i32 %conv.5, %conv.5
%add11.5 = add i32 %mul.5, %add11.4
%arrayidx.6 = getelementptr inbounds i16, ptr %p, i64 6
%6 = load i16, ptr %arrayidx.6, align 2
%conv.6 = zext i16 %6 to i32
%add.6 = add nuw nsw i32 %add.5, %conv.6
%mul.6 = mul nuw nsw i32 %conv.6, %conv.6
%add11.6 = add i32 %mul.6, %add11.5
%arrayidx.7 = getelementptr inbounds i16, ptr %p, i64 7
%7 = load i16, ptr %arrayidx.7, align 2
%conv.7 = zext i16 %7 to i32
%add.7 = add nuw nsw i32 %add.6, %conv.7
%mul.7 = mul nuw nsw i32 %conv.7, %conv.7
%add11.7 = add i32 %mul.7, %add11.6
%add.ptr = getelementptr inbounds i16, ptr %p, i64 %idx.ext
%8 = load i16, ptr %add.ptr, align 2
%conv.140 = zext i16 %8 to i32
%add.141 = add nuw nsw i32 %add.7, %conv.140
%mul.142 = mul nuw nsw i32 %conv.140, %conv.140
%add11.143 = add i32 %mul.142, %add11.7
%arrayidx.1.1 = getelementptr inbounds i16, ptr %add.ptr, i64 1
%9 = load i16, ptr %arrayidx.1.1, align 2
%conv.1.1 = zext i16 %9 to i32
%add.1.1 = add nuw nsw i32 %add.141, %conv.1.1
%mul.1.1 = mul nuw nsw i32 %conv.1.1, %conv.1.1
%add11.1.1 = add i32 %mul.1.1, %add11.143
%arrayidx.2.1 = getelementptr inbounds i16, ptr %add.ptr, i64 2
%10 = load i16, ptr %arrayidx.2.1, align 2
%conv.2.1 = zext i16 %10 to i32
%add.2.1 = add nuw nsw i32 %add.1.1, %conv.2.1
%mul.2.1 = mul nuw nsw i32 %conv.2.1, %conv.2.1
%add11.2.1 = add i32 %mul.2.1, %add11.1.1
%arrayidx.3.1 = getelementptr inbounds i16, ptr %add.ptr, i64 3
%11 = load i16, ptr %arrayidx.3.1, align 2
%conv.3.1 = zext i16 %11 to i32
%add.3.1 = add nuw nsw i32 %add.2.1, %conv.3.1
%mul.3.1 = mul nuw nsw i32 %conv.3.1, %conv.3.1
%add11.3.1 = add i32 %mul.3.1, %add11.2.1
%arrayidx.4.1 = getelementptr inbounds i16, ptr %add.ptr, i64 4
%12 = load i16, ptr %arrayidx.4.1, align 2
%conv.4.1 = zext i16 %12 to i32
%add.4.1 = add nuw nsw i32 %add.3.1, %conv.4.1
%mul.4.1 = mul nuw nsw i32 %conv.4.1, %conv.4.1
%add11.4.1 = add i32 %mul.4.1, %add11.3.1
%arrayidx.5.1 = getelementptr inbounds i16, ptr %add.ptr, i64 5
%13 = load i16, ptr %arrayidx.5.1, align 2
%conv.5.1 = zext i16 %13 to i32
%add.5.1 = add nuw nsw i32 %add.4.1, %conv.5.1
%mul.5.1 = mul nuw nsw i32 %conv.5.1, %conv.5.1
%add11.5.1 = add i32 %mul.5.1, %add11.4.1
%arrayidx.6.1 = getelementptr inbounds i16, ptr %add.ptr, i64 6
%14 = load i16, ptr %arrayidx.6.1, align 2
%conv.6.1 = zext i16 %14 to i32
%add.6.1 = add nuw nsw i32 %add.5.1, %conv.6.1
%mul.6.1 = mul nuw nsw i32 %conv.6.1, %conv.6.1
%add11.6.1 = add i32 %mul.6.1, %add11.5.1
%arrayidx.7.1 = getelementptr inbounds i16, ptr %add.ptr, i64 7
%15 = load i16, ptr %arrayidx.7.1, align 2
%conv.7.1 = zext i16 %15 to i32
%add.7.1 = add nuw nsw i32 %add.6.1, %conv.7.1
%mul.7.1 = mul nuw nsw i32 %conv.7.1, %conv.7.1
%add11.7.1 = add i32 %mul.7.1, %add11.6.1
%add.ptr.1 = getelementptr inbounds i16, ptr %add.ptr, i64 %idx.ext
%16 = load i16, ptr %add.ptr.1, align 2
%conv.244 = zext i16 %16 to i32
%add.245 = add nuw nsw i32 %add.7.1, %conv.244
%mul.246 = mul nuw nsw i32 %conv.244, %conv.244
%add11.247 = add i32 %mul.246, %add11.7.1
%arrayidx.1.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 1
%17 = load i16, ptr %arrayidx.1.2, align 2
%conv.1.2 = zext i16 %17 to i32
%add.1.2 = add nuw nsw i32 %add.245, %conv.1.2
%mul.1.2 = mul nuw nsw i32 %conv.1.2, %conv.1.2
%add11.1.2 = add i32 %mul.1.2, %add11.247
%arrayidx.2.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 2
%18 = load i16, ptr %arrayidx.2.2, align 2
%conv.2.2 = zext i16 %18 to i32
%add.2.2 = add nuw nsw i32 %add.1.2, %conv.2.2
%mul.2.2 = mul nuw nsw i32 %conv.2.2, %conv.2.2
%add11.2.2 = add i32 %mul.2.2, %add11.1.2
%arrayidx.3.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 3
%19 = load i16, ptr %arrayidx.3.2, align 2
%conv.3.2 = zext i16 %19 to i32
%add.3.2 = add nuw nsw i32 %add.2.2, %conv.3.2
%mul.3.2 = mul nuw nsw i32 %conv.3.2, %conv.3.2
%add11.3.2 = add i32 %mul.3.2, %add11.2.2
%arrayidx.4.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 4
%20 = load i16, ptr %arrayidx.4.2, align 2
%conv.4.2 = zext i16 %20 to i32
%add.4.2 = add nuw nsw i32 %add.3.2, %conv.4.2
%mul.4.2 = mul nuw nsw i32 %conv.4.2, %conv.4.2
%add11.4.2 = add i32 %mul.4.2, %add11.3.2
%arrayidx.5.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 5
%21 = load i16, ptr %arrayidx.5.2, align 2
%conv.5.2 = zext i16 %21 to i32
%add.5.2 = add nuw nsw i32 %add.4.2, %conv.5.2
%mul.5.2 = mul nuw nsw i32 %conv.5.2, %conv.5.2
%add11.5.2 = add i32 %mul.5.2, %add11.4.2
%arrayidx.6.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 6
%22 = load i16, ptr %arrayidx.6.2, align 2
%conv.6.2 = zext i16 %22 to i32
%add.6.2 = add nuw nsw i32 %add.5.2, %conv.6.2
%mul.6.2 = mul nuw nsw i32 %conv.6.2, %conv.6.2
%add11.6.2 = add i32 %mul.6.2, %add11.5.2
%arrayidx.7.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 7
%23 = load i16, ptr %arrayidx.7.2, align 2
%conv.7.2 = zext i16 %23 to i32
%add.7.2 = add nuw nsw i32 %add.6.2, %conv.7.2
%mul.7.2 = mul nuw nsw i32 %conv.7.2, %conv.7.2
%add11.7.2 = add i32 %mul.7.2, %add11.6.2
%add.ptr.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 %idx.ext
%24 = load i16, ptr %add.ptr.2, align 2
%conv.348 = zext i16 %24 to i32
%add.349 = add nuw nsw i32 %add.7.2, %conv.348
%mul.350 = mul nuw nsw i32 %conv.348, %conv.348
%add11.351 = add i32 %mul.350, %add11.7.2
%arrayidx.1.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 1
%25 = load i16, ptr %arrayidx.1.3, align 2
%conv.1.3 = zext i16 %25 to i32
%add.1.3 = add nuw nsw i32 %add.349, %conv.1.3
%mul.1.3 = mul nuw nsw i32 %conv.1.3, %conv.1.3
%add11.1.3 = add i32 %mul.1.3, %add11.351
%arrayidx.2.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 2
%26 = load i16, ptr %arrayidx.2.3, align 2
%conv.2.3 = zext i16 %26 to i32
%add.2.3 = add nuw nsw i32 %add.1.3, %conv.2.3
%mul.2.3 = mul nuw nsw i32 %conv.2.3, %conv.2.3
%add11.2.3 = add i32 %mul.2.3, %add11.1.3
%arrayidx.3.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 3
%27 = load i16, ptr %arrayidx.3.3, align 2
%conv.3.3 = zext i16 %27 to i32
%add.3.3 = add nuw nsw i32 %add.2.3, %conv.3.3
%mul.3.3 = mul nuw nsw i32 %conv.3.3, %conv.3.3
%add11.3.3 = add i32 %mul.3.3, %add11.2.3
%arrayidx.4.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 4
%28 = load i16, ptr %arrayidx.4.3, align 2
%conv.4.3 = zext i16 %28 to i32
%add.4.3 = add nuw nsw i32 %add.3.3, %conv.4.3
%mul.4.3 = mul nuw nsw i32 %conv.4.3, %conv.4.3
%add11.4.3 = add i32 %mul.4.3, %add11.3.3
%arrayidx.5.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 5
%29 = load i16, ptr %arrayidx.5.3, align 2
%conv.5.3 = zext i16 %29 to i32
%add.5.3 = add nuw nsw i32 %add.4.3, %conv.5.3
%mul.5.3 = mul nuw nsw i32 %conv.5.3, %conv.5.3
%add11.5.3 = add i32 %mul.5.3, %add11.4.3
%arrayidx.6.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 6
%30 = load i16, ptr %arrayidx.6.3, align 2
%conv.6.3 = zext i16 %30 to i32
%add.6.3 = add nuw nsw i32 %add.5.3, %conv.6.3
%mul.6.3 = mul nuw nsw i32 %conv.6.3, %conv.6.3
%add11.6.3 = add i32 %mul.6.3, %add11.5.3
%arrayidx.7.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 7
%31 = load i16, ptr %arrayidx.7.3, align 2
%conv.7.3 = zext i16 %31 to i32
%add.7.3 = add nuw nsw i32 %add.6.3, %conv.7.3
%mul.7.3 = mul nuw nsw i32 %conv.7.3, %conv.7.3
%add11.7.3 = add i32 %mul.7.3, %add11.6.3
%add.ptr.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 %idx.ext
%32 = load i16, ptr %add.ptr.3, align 2
%conv.452 = zext i16 %32 to i32
%add.453 = add nuw nsw i32 %add.7.3, %conv.452
%mul.454 = mul nuw nsw i32 %conv.452, %conv.452
%add11.455 = add i32 %mul.454, %add11.7.3
%arrayidx.1.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 1
%33 = load i16, ptr %arrayidx.1.4, align 2
%conv.1.4 = zext i16 %33 to i32
%add.1.4 = add nuw nsw i32 %add.453, %conv.1.4
%mul.1.4 = mul nuw nsw i32 %conv.1.4, %conv.1.4
%add11.1.4 = add i32 %mul.1.4, %add11.455
%arrayidx.2.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 2
%34 = load i16, ptr %arrayidx.2.4, align 2
%conv.2.4 = zext i16 %34 to i32
%add.2.4 = add nuw nsw i32 %add.1.4, %conv.2.4
%mul.2.4 = mul nuw nsw i32 %conv.2.4, %conv.2.4
%add11.2.4 = add i32 %mul.2.4, %add11.1.4
%arrayidx.3.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 3
%35 = load i16, ptr %arrayidx.3.4, align 2
%conv.3.4 = zext i16 %35 to i32
%add.3.4 = add nuw nsw i32 %add.2.4, %conv.3.4
%mul.3.4 = mul nuw nsw i32 %conv.3.4, %conv.3.4
%add11.3.4 = add i32 %mul.3.4, %add11.2.4
%arrayidx.4.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 4
%36 = load i16, ptr %arrayidx.4.4, align 2
%conv.4.4 = zext i16 %36 to i32
%add.4.4 = add nuw nsw i32 %add.3.4, %conv.4.4
%mul.4.4 = mul nuw nsw i32 %conv.4.4, %conv.4.4
%add11.4.4 = add i32 %mul.4.4, %add11.3.4
%arrayidx.5.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 5
%37 = load i16, ptr %arrayidx.5.4, align 2
%conv.5.4 = zext i16 %37 to i32
%add.5.4 = add nuw nsw i32 %add.4.4, %conv.5.4
%mul.5.4 = mul nuw nsw i32 %conv.5.4, %conv.5.4
%add11.5.4 = add i32 %mul.5.4, %add11.4.4
%arrayidx.6.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 6
%38 = load i16, ptr %arrayidx.6.4, align 2
%conv.6.4 = zext i16 %38 to i32
%add.6.4 = add nuw nsw i32 %add.5.4, %conv.6.4
%mul.6.4 = mul nuw nsw i32 %conv.6.4, %conv.6.4
%add11.6.4 = add i32 %mul.6.4, %add11.5.4
%arrayidx.7.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 7
%39 = load i16, ptr %arrayidx.7.4, align 2
%conv.7.4 = zext i16 %39 to i32
%add.7.4 = add nuw nsw i32 %add.6.4, %conv.7.4
%mul.7.4 = mul nuw nsw i32 %conv.7.4, %conv.7.4
%add11.7.4 = add i32 %mul.7.4, %add11.6.4
%add.ptr.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 %idx.ext
%40 = load i16, ptr %add.ptr.4, align 2
%conv.556 = zext i16 %40 to i32
%add.557 = add nuw nsw i32 %add.7.4, %conv.556
%mul.558 = mul nuw nsw i32 %conv.556, %conv.556
%add11.559 = add i32 %mul.558, %add11.7.4
%arrayidx.1.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 1
%41 = load i16, ptr %arrayidx.1.5, align 2
%conv.1.5 = zext i16 %41 to i32
%add.1.5 = add nuw nsw i32 %add.557, %conv.1.5
%mul.1.5 = mul nuw nsw i32 %conv.1.5, %conv.1.5
%add11.1.5 = add i32 %mul.1.5, %add11.559
%arrayidx.2.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 2
%42 = load i16, ptr %arrayidx.2.5, align 2
%conv.2.5 = zext i16 %42 to i32
%add.2.5 = add nuw nsw i32 %add.1.5, %conv.2.5
%mul.2.5 = mul nuw nsw i32 %conv.2.5, %conv.2.5
%add11.2.5 = add i32 %mul.2.5, %add11.1.5
%arrayidx.3.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 3
%43 = load i16, ptr %arrayidx.3.5, align 2
%conv.3.5 = zext i16 %43 to i32
%add.3.5 = add nuw nsw i32 %add.2.5, %conv.3.5
%mul.3.5 = mul nuw nsw i32 %conv.3.5, %conv.3.5
%add11.3.5 = add i32 %mul.3.5, %add11.2.5
%arrayidx.4.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 4
%44 = load i16, ptr %arrayidx.4.5, align 2
%conv.4.5 = zext i16 %44 to i32
%add.4.5 = add nuw nsw i32 %add.3.5, %conv.4.5
%mul.4.5 = mul nuw nsw i32 %conv.4.5, %conv.4.5
%add11.4.5 = add i32 %mul.4.5, %add11.3.5
%arrayidx.5.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 5
%45 = load i16, ptr %arrayidx.5.5, align 2
%conv.5.5 = zext i16 %45 to i32
%add.5.5 = add nuw nsw i32 %add.4.5, %conv.5.5
%mul.5.5 = mul nuw nsw i32 %conv.5.5, %conv.5.5
%add11.5.5 = add i32 %mul.5.5, %add11.4.5
%arrayidx.6.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 6
%46 = load i16, ptr %arrayidx.6.5, align 2
%conv.6.5 = zext i16 %46 to i32
%add.6.5 = add nuw nsw i32 %add.5.5, %conv.6.5
%mul.6.5 = mul nuw nsw i32 %conv.6.5, %conv.6.5
%add11.6.5 = add i32 %mul.6.5, %add11.5.5
%arrayidx.7.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 7
%47 = load i16, ptr %arrayidx.7.5, align 2
%conv.7.5 = zext i16 %47 to i32
%add.7.5 = add nuw nsw i32 %add.6.5, %conv.7.5
%mul.7.5 = mul nuw nsw i32 %conv.7.5, %conv.7.5
%add11.7.5 = add i32 %mul.7.5, %add11.6.5
%add.ptr.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 %idx.ext
%48 = load i16, ptr %add.ptr.5, align 2
%conv.660 = zext i16 %48 to i32
%add.661 = add nuw nsw i32 %add.7.5, %conv.660
%mul.662 = mul nuw nsw i32 %conv.660, %conv.660
%add11.663 = add i32 %mul.662, %add11.7.5
%arrayidx.1.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 1
%49 = load i16, ptr %arrayidx.1.6, align 2
%conv.1.6 = zext i16 %49 to i32
%add.1.6 = add nuw nsw i32 %add.661, %conv.1.6
%mul.1.6 = mul nuw nsw i32 %conv.1.6, %conv.1.6
%add11.1.6 = add i32 %mul.1.6, %add11.663
%arrayidx.2.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 2
%50 = load i16, ptr %arrayidx.2.6, align 2
%conv.2.6 = zext i16 %50 to i32
%add.2.6 = add nuw nsw i32 %add.1.6, %conv.2.6
%mul.2.6 = mul nuw nsw i32 %conv.2.6, %conv.2.6
%add11.2.6 = add i32 %mul.2.6, %add11.1.6
%arrayidx.3.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 3
%51 = load i16, ptr %arrayidx.3.6, align 2
%conv.3.6 = zext i16 %51 to i32
%add.3.6 = add nuw nsw i32 %add.2.6, %conv.3.6
%mul.3.6 = mul nuw nsw i32 %conv.3.6, %conv.3.6
%add11.3.6 = add i32 %mul.3.6, %add11.2.6
%arrayidx.4.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 4
%52 = load i16, ptr %arrayidx.4.6, align 2
%conv.4.6 = zext i16 %52 to i32
%add.4.6 = add nuw nsw i32 %add.3.6, %conv.4.6
%mul.4.6 = mul nuw nsw i32 %conv.4.6, %conv.4.6
%add11.4.6 = add i32 %mul.4.6, %add11.3.6
%arrayidx.5.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 5
%53 = load i16, ptr %arrayidx.5.6, align 2
%conv.5.6 = zext i16 %53 to i32
%add.5.6 = add nuw nsw i32 %add.4.6, %conv.5.6
%mul.5.6 = mul nuw nsw i32 %conv.5.6, %conv.5.6
%add11.5.6 = add i32 %mul.5.6, %add11.4.6
%arrayidx.6.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 6
%54 = load i16, ptr %arrayidx.6.6, align 2
%conv.6.6 = zext i16 %54 to i32
%add.6.6 = add nuw nsw i32 %add.5.6, %conv.6.6
%mul.6.6 = mul nuw nsw i32 %conv.6.6, %conv.6.6
%add11.6.6 = add i32 %mul.6.6, %add11.5.6
%arrayidx.7.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 7
%55 = load i16, ptr %arrayidx.7.6, align 2
%conv.7.6 = zext i16 %55 to i32
%add.7.6 = add nuw nsw i32 %add.6.6, %conv.7.6
%mul.7.6 = mul nuw nsw i32 %conv.7.6, %conv.7.6
%add11.7.6 = add i32 %mul.7.6, %add11.6.6
%add.ptr.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 %idx.ext
%56 = load i16, ptr %add.ptr.6, align 2
%conv.764 = zext i16 %56 to i32
%add.765 = add nuw nsw i32 %add.7.6, %conv.764
%mul.766 = mul nuw nsw i32 %conv.764, %conv.764
%add11.767 = add i32 %mul.766, %add11.7.6
%arrayidx.1.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 1
%57 = load i16, ptr %arrayidx.1.7, align 2
%conv.1.7 = zext i16 %57 to i32
%add.1.7 = add nuw nsw i32 %add.765, %conv.1.7
%mul.1.7 = mul nuw nsw i32 %conv.1.7, %conv.1.7
%add11.1.7 = add i32 %mul.1.7, %add11.767
%arrayidx.2.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 2
%58 = load i16, ptr %arrayidx.2.7, align 2
%conv.2.7 = zext i16 %58 to i32
%add.2.7 = add nuw nsw i32 %add.1.7, %conv.2.7
%mul.2.7 = mul nuw nsw i32 %conv.2.7, %conv.2.7
%add11.2.7 = add i32 %mul.2.7, %add11.1.7
%arrayidx.3.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 3
%59 = load i16, ptr %arrayidx.3.7, align 2
%conv.3.7 = zext i16 %59 to i32
%add.3.7 = add nuw nsw i32 %add.2.7, %conv.3.7
%mul.3.7 = mul nuw nsw i32 %conv.3.7, %conv.3.7
%add11.3.7 = add i32 %mul.3.7, %add11.2.7
%arrayidx.4.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 4
%60 = load i16, ptr %arrayidx.4.7, align 2
%conv.4.7 = zext i16 %60 to i32
%add.4.7 = add nuw nsw i32 %add.3.7, %conv.4.7
%mul.4.7 = mul nuw nsw i32 %conv.4.7, %conv.4.7
%add11.4.7 = add i32 %mul.4.7, %add11.3.7
%arrayidx.5.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 5
%61 = load i16, ptr %arrayidx.5.7, align 2
%conv.5.7 = zext i16 %61 to i32
%add.5.7 = add nuw nsw i32 %add.4.7, %conv.5.7
%mul.5.7 = mul nuw nsw i32 %conv.5.7, %conv.5.7
%add11.5.7 = add i32 %mul.5.7, %add11.4.7
%arrayidx.6.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 6
%62 = load i16, ptr %arrayidx.6.7, align 2
%conv.6.7 = zext i16 %62 to i32
%add.6.7 = add nuw nsw i32 %add.5.7, %conv.6.7
%mul.6.7 = mul nuw nsw i32 %conv.6.7, %conv.6.7
%add11.6.7 = add i32 %mul.6.7, %add11.5.7
%arrayidx.7.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 7
%63 = load i16, ptr %arrayidx.7.7, align 2
%conv.7.7 = zext i16 %63 to i32
%add.7.7 = add nuw nsw i32 %add.6.7, %conv.7.7
%mul.7.7 = mul nuw nsw i32 %conv.7.7, %conv.7.7
%add11.7.7 = add i32 %mul.7.7, %add11.6.7
%conv15 = zext i32 %add.7.7 to i64
%conv16 = zext i32 %add11.7.7 to i64
%shl = shl nuw i64 %conv16, 32
%add17 = or i64 %shl, %conv15
ret i64 %add17
}
define i64 @looped(ptr nocapture noundef readonly %p, i32 noundef %st) {
; CHECK-LABEL: @looped(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; CHECK: for.cond1.preheader:
; CHECK-NEXT: [[Y_038:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC13:%.*]], [[FOR_COND1_PREHEADER]] ]
; CHECK-NEXT: [[SQ_037:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_COND1_PREHEADER]] ]
; CHECK-NEXT: [[SM_036:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX1:%.*]], [[FOR_COND1_PREHEADER]] ]
; CHECK-NEXT: [[P_ADDR_035:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND1_PREHEADER]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[P_ADDR_035]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = zext <16 x i16> [[TMP0]] to <16 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <16 x i32> [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
; CHECK-NEXT: [[OP_RDX1]] = add i32 [[TMP3]], [[SM_036]]
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP4]], [[SQ_037]]
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[INC13]] = add nuw nsw i32 [[Y_038]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC13]], 16
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[OP_RDX1]] to i64
; CHECK-NEXT: [[CONV16:%.*]] = zext i32 [[OP_RDX]] to i64
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[CONV16]], 32
; CHECK-NEXT: [[ADD17:%.*]] = or i64 [[SHL]], [[CONV15]]
; CHECK-NEXT: ret i64 [[ADD17]]
;
entry:
%idx.ext = sext i32 %st to i64
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry, %for.cond1.preheader
%y.038 = phi i32 [ 0, %entry ], [ %inc13, %for.cond1.preheader ]
%sq.037 = phi i32 [ 0, %entry ], [ %add11.15, %for.cond1.preheader ]
%sm.036 = phi i32 [ 0, %entry ], [ %add.15, %for.cond1.preheader ]
%p.addr.035 = phi ptr [ %p, %entry ], [ %add.ptr, %for.cond1.preheader ]
%0 = load i16, ptr %p.addr.035, align 2
%conv = zext i16 %0 to i32
%add = add i32 %sm.036, %conv
%mul = mul nuw nsw i32 %conv, %conv
%add11 = add i32 %mul, %sq.037
%arrayidx.1 = getelementptr inbounds i16, ptr %p.addr.035, i64 1
%1 = load i16, ptr %arrayidx.1, align 2
%conv.1 = zext i16 %1 to i32
%add.1 = add i32 %add, %conv.1
%mul.1 = mul nuw nsw i32 %conv.1, %conv.1
%add11.1 = add i32 %mul.1, %add11
%arrayidx.2 = getelementptr inbounds i16, ptr %p.addr.035, i64 2
%2 = load i16, ptr %arrayidx.2, align 2
%conv.2 = zext i16 %2 to i32
%add.2 = add i32 %add.1, %conv.2
%mul.2 = mul nuw nsw i32 %conv.2, %conv.2
%add11.2 = add i32 %mul.2, %add11.1
%arrayidx.3 = getelementptr inbounds i16, ptr %p.addr.035, i64 3
%3 = load i16, ptr %arrayidx.3, align 2
%conv.3 = zext i16 %3 to i32
%add.3 = add i32 %add.2, %conv.3
%mul.3 = mul nuw nsw i32 %conv.3, %conv.3
%add11.3 = add i32 %mul.3, %add11.2
%arrayidx.4 = getelementptr inbounds i16, ptr %p.addr.035, i64 4
%4 = load i16, ptr %arrayidx.4, align 2
%conv.4 = zext i16 %4 to i32
%add.4 = add i32 %add.3, %conv.4
%mul.4 = mul nuw nsw i32 %conv.4, %conv.4
%add11.4 = add i32 %mul.4, %add11.3
%arrayidx.5 = getelementptr inbounds i16, ptr %p.addr.035, i64 5
%5 = load i16, ptr %arrayidx.5, align 2
%conv.5 = zext i16 %5 to i32
%add.5 = add i32 %add.4, %conv.5
%mul.5 = mul nuw nsw i32 %conv.5, %conv.5
%add11.5 = add i32 %mul.5, %add11.4
%arrayidx.6 = getelementptr inbounds i16, ptr %p.addr.035, i64 6
%6 = load i16, ptr %arrayidx.6, align 2
%conv.6 = zext i16 %6 to i32
%add.6 = add i32 %add.5, %conv.6
%mul.6 = mul nuw nsw i32 %conv.6, %conv.6
%add11.6 = add i32 %mul.6, %add11.5
%arrayidx.7 = getelementptr inbounds i16, ptr %p.addr.035, i64 7
%7 = load i16, ptr %arrayidx.7, align 2
%conv.7 = zext i16 %7 to i32
%add.7 = add i32 %add.6, %conv.7
%mul.7 = mul nuw nsw i32 %conv.7, %conv.7
%add11.7 = add i32 %mul.7, %add11.6
%arrayidx.8 = getelementptr inbounds i16, ptr %p.addr.035, i64 8
%8 = load i16, ptr %arrayidx.8, align 2
%conv.8 = zext i16 %8 to i32
%add.8 = add i32 %add.7, %conv.8
%mul.8 = mul nuw nsw i32 %conv.8, %conv.8
%add11.8 = add i32 %mul.8, %add11.7
%arrayidx.9 = getelementptr inbounds i16, ptr %p.addr.035, i64 9
%9 = load i16, ptr %arrayidx.9, align 2
%conv.9 = zext i16 %9 to i32
%add.9 = add i32 %add.8, %conv.9
%mul.9 = mul nuw nsw i32 %conv.9, %conv.9
%add11.9 = add i32 %mul.9, %add11.8
%arrayidx.10 = getelementptr inbounds i16, ptr %p.addr.035, i64 10
%10 = load i16, ptr %arrayidx.10, align 2
%conv.10 = zext i16 %10 to i32
%add.10 = add i32 %add.9, %conv.10
%mul.10 = mul nuw nsw i32 %conv.10, %conv.10
%add11.10 = add i32 %mul.10, %add11.9
%arrayidx.11 = getelementptr inbounds i16, ptr %p.addr.035, i64 11
%11 = load i16, ptr %arrayidx.11, align 2
%conv.11 = zext i16 %11 to i32
%add.11 = add i32 %add.10, %conv.11
%mul.11 = mul nuw nsw i32 %conv.11, %conv.11
%add11.11 = add i32 %mul.11, %add11.10
%arrayidx.12 = getelementptr inbounds i16, ptr %p.addr.035, i64 12
%12 = load i16, ptr %arrayidx.12, align 2
%conv.12 = zext i16 %12 to i32
%add.12 = add i32 %add.11, %conv.12
%mul.12 = mul nuw nsw i32 %conv.12, %conv.12
%add11.12 = add i32 %mul.12, %add11.11
%arrayidx.13 = getelementptr inbounds i16, ptr %p.addr.035, i64 13
%13 = load i16, ptr %arrayidx.13, align 2
%conv.13 = zext i16 %13 to i32
%add.13 = add i32 %add.12, %conv.13
%mul.13 = mul nuw nsw i32 %conv.13, %conv.13
%add11.13 = add i32 %mul.13, %add11.12
%arrayidx.14 = getelementptr inbounds i16, ptr %p.addr.035, i64 14
%14 = load i16, ptr %arrayidx.14, align 2
%conv.14 = zext i16 %14 to i32
%add.14 = add i32 %add.13, %conv.14
%mul.14 = mul nuw nsw i32 %conv.14, %conv.14
%add11.14 = add i32 %mul.14, %add11.13
%arrayidx.15 = getelementptr inbounds i16, ptr %p.addr.035, i64 15
%15 = load i16, ptr %arrayidx.15, align 2
%conv.15 = zext i16 %15 to i32
%add.15 = add i32 %add.14, %conv.15
%mul.15 = mul nuw nsw i32 %conv.15, %conv.15
%add11.15 = add i32 %mul.15, %add11.14
%add.ptr = getelementptr inbounds i16, ptr %p.addr.035, i64 %idx.ext
%inc13 = add nuw nsw i32 %y.038, 1
%exitcond.not = icmp eq i32 %inc13, 16
br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader
for.cond.cleanup: ; preds = %for.cond1.preheader
%conv15 = zext i32 %add.15 to i64
%conv16 = zext i32 %add11.15 to i64
%shl = shl nuw i64 %conv16, 32
%add17 = or i64 %shl, %conv15
ret i64 %add17
}