; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s
define void @test(ptr noalias %p, ptr noalias %addr, ptr noalias %s) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]]
; CHECK-NEXT: store <8 x i32> [[TMP10]], ptr [[S:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%idx1 = load i32, ptr %addr, align 8
%arrayidx = getelementptr inbounds i32, ptr %p, i32 %idx1
%i = load i32, ptr %arrayidx, align 4
%gep2 = getelementptr inbounds i32, ptr %addr, i32 1
%idx2 = load i32, ptr %gep2, align 8
%arrayidx1 = getelementptr inbounds i32, ptr %p, i32 %idx2
%i1 = load i32, ptr %arrayidx1, align 4
%add = add nsw i32 %i1, %i
store i32 %add, ptr %s, align 4
%gep3 = getelementptr inbounds i32, ptr %addr, i32 2
%idx3 = load i32, ptr %gep3, align 8
%arrayidx4 = getelementptr inbounds i32, ptr %p, i32 %idx3
%i2 = load i32, ptr %arrayidx4, align 4
%gep4 = getelementptr inbounds i32, ptr %addr, i32 3
%idx4 = load i32, ptr %gep4, align 8
%arrayidx6 = getelementptr inbounds i32, ptr %p, i32 %idx4
%i3 = load i32, ptr %arrayidx6, align 4
%add7 = add nsw i32 %i3, %i2
%arrayidx9 = getelementptr inbounds i32, ptr %s, i32 1
store i32 %add7, ptr %arrayidx9, align 4
%gep5 = getelementptr inbounds i32, ptr %addr, i32 4
%idx5 = load i32, ptr %gep5, align 8
%arrayidx11 = getelementptr inbounds i32, ptr %p, i32 %idx5
%i4 = load i32, ptr %arrayidx11, align 4
%gep6 = getelementptr inbounds i32, ptr %addr, i32 5
%idx6 = load i32, ptr %gep6, align 8
%arrayidx13 = getelementptr inbounds i32, ptr %p, i32 %idx6
%i5 = load i32, ptr %arrayidx13, align 4
%add14 = add nsw i32 %i5, %i4
%arrayidx16 = getelementptr inbounds i32, ptr %s, i32 2
store i32 %add14, ptr %arrayidx16, align 4
%gep7 = getelementptr inbounds i32, ptr %addr, i32 6
%idx7 = load i32, ptr %gep7, align 8
%arrayidx18 = getelementptr inbounds i32, ptr %p, i32 %idx7
%i6 = load i32, ptr %arrayidx18, align 4
%gep8 = getelementptr inbounds i32, ptr %addr, i32 7
%idx8 = load i32, ptr %gep8, align 8
%arrayidx20 = getelementptr inbounds i32, ptr %p, i32 %idx8
%i7 = load i32, ptr %arrayidx20, align 4
%add21 = add nsw i32 %i7, %i6
%arrayidx23 = getelementptr inbounds i32, ptr %s, i32 3
store i32 %add21, ptr %arrayidx23, align 4
%gep9 = getelementptr inbounds i32, ptr %addr, i32 8
%idx9 = load i32, ptr %gep9, align 8
%arrayidx25 = getelementptr inbounds i32, ptr %p, i32 %idx9
%i8 = load i32, ptr %arrayidx25, align 4
%gep10 = getelementptr inbounds i32, ptr %addr, i32 9
%idx10 = load i32, ptr %gep10, align 8
%arrayidx27 = getelementptr inbounds i32, ptr %p, i32 %idx10
%i9 = load i32, ptr %arrayidx27, align 4
%add28 = add nsw i32 %i9, %i8
%arrayidx30 = getelementptr inbounds i32, ptr %s, i32 4
store i32 %add28, ptr %arrayidx30, align 4
%gep11 = getelementptr inbounds i32, ptr %addr, i32 10
%idx11 = load i32, ptr %gep11, align 8
%arrayidx32 = getelementptr inbounds i32, ptr %p, i32 %idx11
%i10 = load i32, ptr %arrayidx32, align 4
%gep12 = getelementptr inbounds i32, ptr %addr, i32 11
%idx12 = load i32, ptr %gep12, align 8
%arrayidx34 = getelementptr inbounds i32, ptr %p, i32 %idx12
%i11 = load i32, ptr %arrayidx34, align 4
%add35 = add nsw i32 %i11, %i10
%arrayidx37 = getelementptr inbounds i32, ptr %s, i32 5
store i32 %add35, ptr %arrayidx37, align 4
%gep13 = getelementptr inbounds i32, ptr %addr, i32 12
%idx13 = load i32, ptr %gep13, align 8
%arrayidx39 = getelementptr inbounds i32, ptr %p, i32 %idx13
%i12 = load i32, ptr %arrayidx39, align 4
%gep14 = getelementptr inbounds i32, ptr %addr, i32 13
%idx14 = load i32, ptr %gep14, align 8
%arrayidx41 = getelementptr inbounds i32, ptr %p, i32 %idx14
%i13 = load i32, ptr %arrayidx41, align 4
%add42 = add nsw i32 %i13, %i12
%arrayidx44 = getelementptr inbounds i32, ptr %s, i32 6
store i32 %add42, ptr %arrayidx44, align 4
%gep15 = getelementptr inbounds i32, ptr %addr, i32 14
%idx15 = load i32, ptr %gep15, align 8
%arrayidx46 = getelementptr inbounds i32, ptr %p, i32 %idx15
%i14 = load i32, ptr %arrayidx46, align 4
%gep16 = getelementptr inbounds i32, ptr %addr, i32 15
%idx16 = load i32, ptr %gep16, align 8
%arrayidx48 = getelementptr inbounds i32, ptr %p, i32 %idx16
%i15 = load i32, ptr %arrayidx48, align 4
%add49 = add nsw i32 %i15, %i14
%arrayidx51 = getelementptr inbounds i32, ptr %s, i32 7
store i32 %add49, ptr %arrayidx51, align 4
ret void
}