; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
;void Distance(ptr p1, int p2, unsigned long p3[], float p4[]) {
; long a = p3[0] = 5;
; p1 += p2;
; p4[3] += p1[a];
; p3[0] >>= 5;
; p3[1] >>= 5;
; p3[2] >>= 5;
; p3[3] >>= 5;
; p1 += p2;
; p4[0] += p1[p3[0] & a];
;}
define void @_Z8DistanceIlLi5EEvPfiPmS0_(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 5, ptr [[P3:%.*]], align 8
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8
; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP6]], 5
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR11]], i64 [[AND]]
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[P4]], align 4
; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]]
; CHECK-NEXT: store float [[ADD15]], ptr [[P4]], align 4
; CHECK-NEXT: ret void
;
entry:
store i64 5, ptr %p3, align 8
%idx.ext = sext i32 %p2 to i64
%add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
%arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
%0 = load float, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
%1 = load float, ptr %arrayidx2, align 4
%add = fadd float %0, %1
store float %add, ptr %arrayidx2, align 4
%2 = load i64, ptr %p3, align 8
%shr = lshr i64 %2, 5
store i64 %shr, ptr %p3, align 8
%arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
%3 = load i64, ptr %arrayidx4, align 8
%shr5 = lshr i64 %3, 5
store i64 %shr5, ptr %arrayidx4, align 8
%arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
%4 = load i64, ptr %arrayidx6, align 8
%shr7 = lshr i64 %4, 5
store i64 %shr7, ptr %arrayidx6, align 8
%arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
%5 = load i64, ptr %arrayidx8, align 8
%shr9 = lshr i64 %5, 5
store i64 %shr9, ptr %arrayidx8, align 8
%add.ptr11 = getelementptr inbounds float, ptr %add.ptr, i64 %idx.ext
%and = and i64 %shr, 5
%arrayidx13 = getelementptr inbounds float, ptr %add.ptr11, i64 %and
%6 = load float, ptr %arrayidx13, align 4
%7 = load float, ptr %p4, align 4
%add15 = fadd float %6, %7
store float %add15, ptr %p4, align 4
ret void
}
define void @store_reverse(ptr %p3) {
; CHECK-LABEL: @store_reverse(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[P3:%.*]], i64 8
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P3]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], ptr [[ARRAYIDX14]], align 8
; CHECK-NEXT: ret void
;
entry:
%0 = load i64, ptr %p3, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %p3, i64 8
%1 = load i64, ptr %arrayidx1, align 8
%shl = shl i64 %0, %1
%arrayidx2 = getelementptr inbounds i64, ptr %p3, i64 7
store i64 %shl, ptr %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds i64, ptr %p3, i64 1
%2 = load i64, ptr %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 9
%3 = load i64, ptr %arrayidx4, align 8
%shl5 = shl i64 %2, %3
%arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 6
store i64 %shl5, ptr %arrayidx6, align 8
%arrayidx7 = getelementptr inbounds i64, ptr %p3, i64 2
%4 = load i64, ptr %arrayidx7, align 8
%arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 10
%5 = load i64, ptr %arrayidx8, align 8
%shl9 = shl i64 %4, %5
%arrayidx10 = getelementptr inbounds i64, ptr %p3, i64 5
store i64 %shl9, ptr %arrayidx10, align 8
%arrayidx11 = getelementptr inbounds i64, ptr %p3, i64 3
%6 = load i64, ptr %arrayidx11, align 8
%arrayidx12 = getelementptr inbounds i64, ptr %p3, i64 11
%7 = load i64, ptr %arrayidx12, align 8
%shl13 = shl i64 %6, %7
%arrayidx14 = getelementptr inbounds i64, ptr %p3, i64 4
store i64 %shl13, ptr %arrayidx14, align 8
ret void
}
define void @store15(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
; CHECK-LABEL: @store15(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 5, ptr [[P3:%.*]], align 8
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 5
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8
; CHECK-NEXT: ret void
;
entry:
store i64 5, ptr %p3, align 8
%idx.ext = sext i32 %p2 to i64
%add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
%arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
%0 = load float, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
%1 = load float, ptr %arrayidx2, align 4
%add = fadd float %0, %1
store float %add, ptr %arrayidx2, align 4
%2 = load i64, ptr %p3, align 8
%shr = lshr i64 %2, 5
store i64 %shr, ptr %p3, align 8
%arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
%3 = load i64, ptr %arrayidx4, align 8
%shr5 = lshr i64 %3, 5
store i64 %shr5, ptr %arrayidx4, align 8
%arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
%4 = load i64, ptr %arrayidx6, align 8
%shr7 = lshr i64 %4, 5
store i64 %shr7, ptr %arrayidx6, align 8
%arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
%5 = load i64, ptr %arrayidx8, align 8
%shr9 = lshr i64 %5, 5
%arrayidx9 = getelementptr inbounds i64, ptr %p3, i64 5
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 %shr9, ptr %arrayidx8, align 8
ret void
}
define void @store16(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
; CHECK-LABEL: @store16(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 5, ptr [[P3:%.*]], align 8
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 5
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8
; CHECK-NEXT: ret void
;
entry:
store i64 5, ptr %p3, align 8
%idx.ext = sext i32 %p2 to i64
%add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
%arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
%0 = load float, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
%1 = load float, ptr %arrayidx2, align 4
%add = fadd float %0, %1
store float %add, ptr %arrayidx2, align 4
%2 = load i64, ptr %p3, align 8
%shr = lshr i64 %2, 5
store i64 %shr, ptr %p3, align 8
%arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
%3 = load i64, ptr %arrayidx4, align 8
%shr5 = lshr i64 %3, 5
store i64 %shr5, ptr %arrayidx4, align 8
%arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
%4 = load i64, ptr %arrayidx6, align 8
%shr7 = lshr i64 %4, 5
store i64 %shr7, ptr %arrayidx6, align 8
%arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
%5 = load i64, ptr %arrayidx8, align 8
%shr9 = lshr i64 %5, 5
%arrayidx9 = getelementptr inbounds i64, ptr %p3, i64 5
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 5, ptr %arrayidx9, align 8
store i64 %shr9, ptr %arrayidx8, align 8
ret void
}