; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD
@n = external local_unnamed_addr global i32, align 4
@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
@res = external local_unnamed_addr global float, align 4
define float @baz() {
; CHECK-LABEL: @baz(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 2.000000e+00
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
; CHECK-NEXT: store float [[OP_RDX]], ptr @res, align 4
; CHECK-NEXT: ret float [[OP_RDX]]
;
; THRESHOLD-LABEL: @baz(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1
; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], <float 2.000000e+00, float 2.000000e+00>
; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
; THRESHOLD-NEXT: store float [[OP_RDX]], ptr @res, align 4
; THRESHOLD-NEXT: ret float [[OP_RDX]]
;
entry:
%0 = load i32, ptr @n, align 4
%mul = mul nsw i32 %0, 3
%conv = sitofp i32 %mul to float
%1 = load float, ptr @arr, align 16
%2 = load float, ptr @arr1, align 16
%mul4 = fmul fast float %2, %1
%add = fadd fast float %mul4, %conv
%3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
%4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
%mul4.1 = fmul fast float %4, %3
%add.1 = fadd fast float %mul4.1, %add
%5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
%6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
%mul4.2 = fmul fast float %6, %5
%add.2 = fadd fast float %mul4.2, %add.1
%7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
%8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
%mul4.3 = fmul fast float %8, %7
%add.3 = fadd fast float %mul4.3, %add.2
%add7 = fadd fast float %add.3, %conv
%add19 = fadd fast float %mul4, %add7
%add19.1 = fadd fast float %mul4.1, %add19
%add19.2 = fadd fast float %mul4.2, %add19.1
%add19.3 = fadd fast float %mul4.3, %add19.2
store float %add19.3, ptr @res, align 4
ret float %add19.3
}
define float @bazz() {
; CHECK-LABEL: @bazz(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]])
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
; CHECK-NEXT: store float [[OP_RDX1]], ptr @res, align 4
; CHECK-NEXT: ret float [[OP_RDX1]]
;
; THRESHOLD-LABEL: @bazz(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16
; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16
; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]])
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
; THRESHOLD-NEXT: store float [[OP_RDX1]], ptr @res, align 4
; THRESHOLD-NEXT: ret float [[OP_RDX1]]
;
entry:
%0 = load i32, ptr @n, align 4
%mul = mul nsw i32 %0, 3
%conv = sitofp i32 %mul to float
%1 = load float, ptr @arr, align 16
%2 = load float, ptr @arr1, align 16
%mul4 = fmul fast float %2, %1
%add = fadd fast float %mul4, %conv
%3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
%4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
%mul4.1 = fmul fast float %4, %3
%add.1 = fadd fast float %mul4.1, %add
%5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
%6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
%mul4.2 = fmul fast float %6, %5
%add.2 = fadd fast float %mul4.2, %add.1
%7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
%8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
%mul4.3 = fmul fast float %8, %7
%add.3 = fadd fast float %mul4.3, %add.2
%mul5 = shl nsw i32 %0, 2
%conv6 = sitofp i32 %mul5 to float
%add7 = fadd fast float %add.3, %conv6
%9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 4), align 16
%10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 4), align 16
%mul18 = fmul fast float %10, %9
%add19 = fadd fast float %mul18, %add7
%11 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 5), align 4
%12 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 5), align 4
%mul18.1 = fmul fast float %12, %11
%add19.1 = fadd fast float %mul18.1, %add19
%13 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 6), align 8
%14 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 6), align 8
%mul18.2 = fmul fast float %14, %13
%add19.2 = fadd fast float %mul18.2, %add19.1
%15 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 7), align 4
%16 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 7), align 4
%mul18.3 = fmul fast float %16, %15
%add19.3 = fadd fast float %mul18.3, %add19.2
store float %add19.3, ptr @res, align 4
ret float %add19.3
}
define float @bazzz() {
; CHECK-LABEL: @bazzz(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
; CHECK-NEXT: store float [[TMP5]], ptr @res, align 4
; CHECK-NEXT: ret float [[TMP5]]
;
; THRESHOLD-LABEL: @bazzz(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
; THRESHOLD-NEXT: store float [[TMP5]], ptr @res, align 4
; THRESHOLD-NEXT: ret float [[TMP5]]
;
entry:
%0 = load i32, ptr @n, align 4
%conv = sitofp i32 %0 to float
%1 = load float, ptr @arr, align 16
%2 = load float, ptr @arr1, align 16
%mul = fmul fast float %2, %1
%3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
%4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
%mul.1 = fmul fast float %4, %3
%5 = fadd fast float %mul.1, %mul
%6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
%7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
%mul.2 = fmul fast float %7, %6
%8 = fadd fast float %mul.2, %5
%9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
%10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
%mul.3 = fmul fast float %10, %9
%11 = fadd fast float %mul.3, %8
%12 = fmul fast float %conv, %11
store float %12, ptr @res, align 4
ret float %12
}
define i32 @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
; CHECK-NEXT: store i32 [[CONV4]], ptr @n, align 4
; CHECK-NEXT: ret i32 [[CONV4]]
;
; THRESHOLD-LABEL: @foo(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, ptr @n, align 4
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
; THRESHOLD-NEXT: store i32 [[CONV4]], ptr @n, align 4
; THRESHOLD-NEXT: ret i32 [[CONV4]]
;
entry:
%0 = load i32, ptr @n, align 4
%conv = sitofp i32 %0 to float
%1 = load float, ptr @arr, align 16
%2 = load float, ptr @arr1, align 16
%mul = fmul fast float %2, %1
%3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
%4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
%mul.1 = fmul fast float %4, %3
%5 = fadd fast float %mul.1, %mul
%6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
%7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
%mul.2 = fmul fast float %7, %6
%8 = fadd fast float %mul.2, %5
%9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
%10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
%mul.3 = fmul fast float %10, %9
%11 = fadd fast float %mul.3, %8
%12 = fmul fast float %conv, %11
%conv4 = fptosi float %12 to i32
store i32 %conv4, ptr @n, align 4
ret i32 %conv4
}
; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
; with fastmath on the select.
define float @bar() {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
; CHECK-NEXT: store float [[MAX_0_MUL3_2]], ptr @res, align 4
; CHECK-NEXT: ret float [[MAX_0_MUL3_2]]
;
; THRESHOLD-LABEL: @bar(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16
; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
; THRESHOLD-NEXT: [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
; THRESHOLD-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
; THRESHOLD-NEXT: [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
; THRESHOLD-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
; THRESHOLD-NEXT: store float [[MAX_0_MUL3_2]], ptr @res, align 4
; THRESHOLD-NEXT: ret float [[MAX_0_MUL3_2]]
;
entry:
%0 = load float, ptr @arr, align 16
%1 = load float, ptr @arr1, align 16
%mul = fmul fast float %1, %0
%2 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
%3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
%mul3 = fmul fast float %3, %2
%cmp4 = fcmp fast ogt float %mul, %mul3
%max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
%4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
%5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
%mul3.1 = fmul fast float %5, %4
%cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
%max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
%6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
%7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
%mul3.2 = fmul fast float %7, %6
%cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
%max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
store float %max.0.mul3.2, ptr @res, align 4
ret float %max.0.mul3.2
}
define float @f(ptr nocapture readonly %x) {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret float [[OP_RDX]]
;
; THRESHOLD-LABEL: @f(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
; THRESHOLD-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
; THRESHOLD-NEXT: ret float [[OP_RDX]]
;
entry:
%0 = load float, ptr %x, align 4
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
%1 = load float, ptr %arrayidx.1, align 4
%add.1 = fadd fast float %1, %0
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
%2 = load float, ptr %arrayidx.2, align 4
%add.2 = fadd fast float %2, %add.1
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
%3 = load float, ptr %arrayidx.3, align 4
%add.3 = fadd fast float %3, %add.2
%arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4
%4 = load float, ptr %arrayidx.4, align 4
%add.4 = fadd fast float %4, %add.3
%arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5
%5 = load float, ptr %arrayidx.5, align 4
%add.5 = fadd fast float %5, %add.4
%arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6
%6 = load float, ptr %arrayidx.6, align 4
%add.6 = fadd fast float %6, %add.5
%arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7
%7 = load float, ptr %arrayidx.7, align 4
%add.7 = fadd fast float %7, %add.6
%arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8
%8 = load float, ptr %arrayidx.8, align 4
%add.8 = fadd fast float %8, %add.7
%arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9
%9 = load float, ptr %arrayidx.9, align 4
%add.9 = fadd fast float %9, %add.8
%arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10
%10 = load float, ptr %arrayidx.10, align 4
%add.10 = fadd fast float %10, %add.9
%arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11
%11 = load float, ptr %arrayidx.11, align 4
%add.11 = fadd fast float %11, %add.10
%arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12
%12 = load float, ptr %arrayidx.12, align 4
%add.12 = fadd fast float %12, %add.11
%arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13
%13 = load float, ptr %arrayidx.13, align 4
%add.13 = fadd fast float %13, %add.12
%arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14
%14 = load float, ptr %arrayidx.14, align 4
%add.14 = fadd fast float %14, %add.13
%arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15
%15 = load float, ptr %arrayidx.15, align 4
%add.15 = fadd fast float %15, %add.14
%arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16
%16 = load float, ptr %arrayidx.16, align 4
%add.16 = fadd fast float %16, %add.15
%arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17
%17 = load float, ptr %arrayidx.17, align 4
%add.17 = fadd fast float %17, %add.16
%arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18
%18 = load float, ptr %arrayidx.18, align 4
%add.18 = fadd fast float %18, %add.17
%arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19
%19 = load float, ptr %arrayidx.19, align 4
%add.19 = fadd fast float %19, %add.18
%arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20
%20 = load float, ptr %arrayidx.20, align 4
%add.20 = fadd fast float %20, %add.19
%arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21
%21 = load float, ptr %arrayidx.21, align 4
%add.21 = fadd fast float %21, %add.20
%arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22
%22 = load float, ptr %arrayidx.22, align 4
%add.22 = fadd fast float %22, %add.21
%arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23
%23 = load float, ptr %arrayidx.23, align 4
%add.23 = fadd fast float %23, %add.22
%arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24
%24 = load float, ptr %arrayidx.24, align 4
%add.24 = fadd fast float %24, %add.23
%arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25
%25 = load float, ptr %arrayidx.25, align 4
%add.25 = fadd fast float %25, %add.24
%arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26
%26 = load float, ptr %arrayidx.26, align 4
%add.26 = fadd fast float %26, %add.25
%arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27
%27 = load float, ptr %arrayidx.27, align 4
%add.27 = fadd fast float %27, %add.26
%arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28
%28 = load float, ptr %arrayidx.28, align 4
%add.28 = fadd fast float %28, %add.27
%arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29
%29 = load float, ptr %arrayidx.29, align 4
%add.29 = fadd fast float %29, %add.28
%arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30
%30 = load float, ptr %arrayidx.30, align 4
%add.30 = fadd fast float %30, %add.29
%arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31
%31 = load float, ptr %arrayidx.31, align 4
%add.31 = fadd fast float %31, %add.30
%arrayidx.32 = getelementptr inbounds float, ptr %x, i64 32
%32 = load float, ptr %arrayidx.32, align 4
%add.32 = fadd fast float %32, %add.31
%arrayidx.33 = getelementptr inbounds float, ptr %x, i64 33
%33 = load float, ptr %arrayidx.33, align 4
%add.33 = fadd fast float %33, %add.32
%arrayidx.34 = getelementptr inbounds float, ptr %x, i64 34
%34 = load float, ptr %arrayidx.34, align 4
%add.34 = fadd fast float %34, %add.33
%arrayidx.35 = getelementptr inbounds float, ptr %x, i64 35
%35 = load float, ptr %arrayidx.35, align 4
%add.35 = fadd fast float %35, %add.34
%arrayidx.36 = getelementptr inbounds float, ptr %x, i64 36
%36 = load float, ptr %arrayidx.36, align 4
%add.36 = fadd fast float %36, %add.35
%arrayidx.37 = getelementptr inbounds float, ptr %x, i64 37
%37 = load float, ptr %arrayidx.37, align 4
%add.37 = fadd fast float %37, %add.36
%arrayidx.38 = getelementptr inbounds float, ptr %x, i64 38
%38 = load float, ptr %arrayidx.38, align 4
%add.38 = fadd fast float %38, %add.37
%arrayidx.39 = getelementptr inbounds float, ptr %x, i64 39
%39 = load float, ptr %arrayidx.39, align 4
%add.39 = fadd fast float %39, %add.38
%arrayidx.40 = getelementptr inbounds float, ptr %x, i64 40
%40 = load float, ptr %arrayidx.40, align 4
%add.40 = fadd fast float %40, %add.39
%arrayidx.41 = getelementptr inbounds float, ptr %x, i64 41
%41 = load float, ptr %arrayidx.41, align 4
%add.41 = fadd fast float %41, %add.40
%arrayidx.42 = getelementptr inbounds float, ptr %x, i64 42
%42 = load float, ptr %arrayidx.42, align 4
%add.42 = fadd fast float %42, %add.41
%arrayidx.43 = getelementptr inbounds float, ptr %x, i64 43
%43 = load float, ptr %arrayidx.43, align 4
%add.43 = fadd fast float %43, %add.42
%arrayidx.44 = getelementptr inbounds float, ptr %x, i64 44
%44 = load float, ptr %arrayidx.44, align 4
%add.44 = fadd fast float %44, %add.43
%arrayidx.45 = getelementptr inbounds float, ptr %x, i64 45
%45 = load float, ptr %arrayidx.45, align 4
%add.45 = fadd fast float %45, %add.44
%arrayidx.46 = getelementptr inbounds float, ptr %x, i64 46
%46 = load float, ptr %arrayidx.46, align 4
%add.46 = fadd fast float %46, %add.45
%arrayidx.47 = getelementptr inbounds float, ptr %x, i64 47
%47 = load float, ptr %arrayidx.47, align 4
%add.47 = fadd fast float %47, %add.46
ret float %add.47
}
define float @f1(ptr nocapture readonly %x, i32 %a, i32 %b) {
; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[CONV]]
; CHECK-NEXT: ret float [[OP_RDX]]
;
; THRESHOLD-LABEL: @f1(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[CONV]]
; THRESHOLD-NEXT: ret float [[OP_RDX]]
;
entry:
%rem = srem i32 %a, %b
%conv = sitofp i32 %rem to float
%0 = load float, ptr %x, align 4
%add = fadd fast float %0, %conv
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
%1 = load float, ptr %arrayidx.1, align 4
%add.1 = fadd fast float %1, %add
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
%2 = load float, ptr %arrayidx.2, align 4
%add.2 = fadd fast float %2, %add.1
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
%3 = load float, ptr %arrayidx.3, align 4
%add.3 = fadd fast float %3, %add.2
%arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4
%4 = load float, ptr %arrayidx.4, align 4
%add.4 = fadd fast float %4, %add.3
%arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5
%5 = load float, ptr %arrayidx.5, align 4
%add.5 = fadd fast float %5, %add.4
%arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6
%6 = load float, ptr %arrayidx.6, align 4
%add.6 = fadd fast float %6, %add.5
%arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7
%7 = load float, ptr %arrayidx.7, align 4
%add.7 = fadd fast float %7, %add.6
%arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8
%8 = load float, ptr %arrayidx.8, align 4
%add.8 = fadd fast float %8, %add.7
%arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9
%9 = load float, ptr %arrayidx.9, align 4
%add.9 = fadd fast float %9, %add.8
%arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10
%10 = load float, ptr %arrayidx.10, align 4
%add.10 = fadd fast float %10, %add.9
%arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11
%11 = load float, ptr %arrayidx.11, align 4
%add.11 = fadd fast float %11, %add.10
%arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12
%12 = load float, ptr %arrayidx.12, align 4
%add.12 = fadd fast float %12, %add.11
%arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13
%13 = load float, ptr %arrayidx.13, align 4
%add.13 = fadd fast float %13, %add.12
%arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14
%14 = load float, ptr %arrayidx.14, align 4
%add.14 = fadd fast float %14, %add.13
%arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15
%15 = load float, ptr %arrayidx.15, align 4
%add.15 = fadd fast float %15, %add.14
%arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16
%16 = load float, ptr %arrayidx.16, align 4
%add.16 = fadd fast float %16, %add.15
%arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17
%17 = load float, ptr %arrayidx.17, align 4
%add.17 = fadd fast float %17, %add.16
%arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18
%18 = load float, ptr %arrayidx.18, align 4
%add.18 = fadd fast float %18, %add.17
%arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19
%19 = load float, ptr %arrayidx.19, align 4
%add.19 = fadd fast float %19, %add.18
%arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20
%20 = load float, ptr %arrayidx.20, align 4
%add.20 = fadd fast float %20, %add.19
%arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21
%21 = load float, ptr %arrayidx.21, align 4
%add.21 = fadd fast float %21, %add.20
%arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22
%22 = load float, ptr %arrayidx.22, align 4
%add.22 = fadd fast float %22, %add.21
%arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23
%23 = load float, ptr %arrayidx.23, align 4
%add.23 = fadd fast float %23, %add.22
%arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24
%24 = load float, ptr %arrayidx.24, align 4
%add.24 = fadd fast float %24, %add.23
%arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25
%25 = load float, ptr %arrayidx.25, align 4
%add.25 = fadd fast float %25, %add.24
%arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26
%26 = load float, ptr %arrayidx.26, align 4
%add.26 = fadd fast float %26, %add.25
%arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27
%27 = load float, ptr %arrayidx.27, align 4
%add.27 = fadd fast float %27, %add.26
%arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28
%28 = load float, ptr %arrayidx.28, align 4
%add.28 = fadd fast float %28, %add.27
%arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29
%29 = load float, ptr %arrayidx.29, align 4
%add.29 = fadd fast float %29, %add.28
%arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30
%30 = load float, ptr %arrayidx.30, align 4
%add.30 = fadd fast float %30, %add.29
%arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31
%31 = load float, ptr %arrayidx.31, align 4
%add.31 = fadd fast float %31, %add.30
ret float %add.31
}
define float @loadadd31(ptr nocapture readonly %x) {
; CHECK-LABEL: @loadadd31(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]]
; CHECK-NEXT: ret float [[OP_RDX3]]
;
; THRESHOLD-LABEL: @loadadd31(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
; THRESHOLD-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
; THRESHOLD-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]]
; THRESHOLD-NEXT: ret float [[OP_RDX3]]
;
entry:
%arrayidx = getelementptr inbounds float, ptr %x, i64 1
%0 = load float, ptr %arrayidx, align 4
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 2
%1 = load float, ptr %arrayidx.1, align 4
%add.1 = fadd fast float %1, %0
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 3
%2 = load float, ptr %arrayidx.2, align 4
%add.2 = fadd fast float %2, %add.1
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 4
%3 = load float, ptr %arrayidx.3, align 4
%add.3 = fadd fast float %3, %add.2
%arrayidx.4 = getelementptr inbounds float, ptr %x, i64 5
%4 = load float, ptr %arrayidx.4, align 4
%add.4 = fadd fast float %4, %add.3
%arrayidx.5 = getelementptr inbounds float, ptr %x, i64 6
%5 = load float, ptr %arrayidx.5, align 4
%add.5 = fadd fast float %5, %add.4
%arrayidx.6 = getelementptr inbounds float, ptr %x, i64 7
%6 = load float, ptr %arrayidx.6, align 4
%add.6 = fadd fast float %6, %add.5
%arrayidx.7 = getelementptr inbounds float, ptr %x, i64 8
%7 = load float, ptr %arrayidx.7, align 4
%add.7 = fadd fast float %7, %add.6
%arrayidx.8 = getelementptr inbounds float, ptr %x, i64 9
%8 = load float, ptr %arrayidx.8, align 4
%add.8 = fadd fast float %8, %add.7
%arrayidx.9 = getelementptr inbounds float, ptr %x, i64 10
%9 = load float, ptr %arrayidx.9, align 4
%add.9 = fadd fast float %9, %add.8
%arrayidx.10 = getelementptr inbounds float, ptr %x, i64 11
%10 = load float, ptr %arrayidx.10, align 4
%add.10 = fadd fast float %10, %add.9
%arrayidx.11 = getelementptr inbounds float, ptr %x, i64 12
%11 = load float, ptr %arrayidx.11, align 4
%add.11 = fadd fast float %11, %add.10
%arrayidx.12 = getelementptr inbounds float, ptr %x, i64 13
%12 = load float, ptr %arrayidx.12, align 4
%add.12 = fadd fast float %12, %add.11
%arrayidx.13 = getelementptr inbounds float, ptr %x, i64 14
%13 = load float, ptr %arrayidx.13, align 4
%add.13 = fadd fast float %13, %add.12
%arrayidx.14 = getelementptr inbounds float, ptr %x, i64 15
%14 = load float, ptr %arrayidx.14, align 4
%add.14 = fadd fast float %14, %add.13
%arrayidx.15 = getelementptr inbounds float, ptr %x, i64 16
%15 = load float, ptr %arrayidx.15, align 4
%add.15 = fadd fast float %15, %add.14
%arrayidx.16 = getelementptr inbounds float, ptr %x, i64 17
%16 = load float, ptr %arrayidx.16, align 4
%add.16 = fadd fast float %16, %add.15
%arrayidx.17 = getelementptr inbounds float, ptr %x, i64 18
%17 = load float, ptr %arrayidx.17, align 4
%add.17 = fadd fast float %17, %add.16
%arrayidx.18 = getelementptr inbounds float, ptr %x, i64 19
%18 = load float, ptr %arrayidx.18, align 4
%add.18 = fadd fast float %18, %add.17
%arrayidx.19 = getelementptr inbounds float, ptr %x, i64 20
%19 = load float, ptr %arrayidx.19, align 4
%add.19 = fadd fast float %19, %add.18
%arrayidx.20 = getelementptr inbounds float, ptr %x, i64 21
%20 = load float, ptr %arrayidx.20, align 4
%add.20 = fadd fast float %20, %add.19
%arrayidx.21 = getelementptr inbounds float, ptr %x, i64 22
%21 = load float, ptr %arrayidx.21, align 4
%add.21 = fadd fast float %21, %add.20
%arrayidx.22 = getelementptr inbounds float, ptr %x, i64 23
%22 = load float, ptr %arrayidx.22, align 4
%add.22 = fadd fast float %22, %add.21
%arrayidx.23 = getelementptr inbounds float, ptr %x, i64 24
%23 = load float, ptr %arrayidx.23, align 4
%add.23 = fadd fast float %23, %add.22
%arrayidx.24 = getelementptr inbounds float, ptr %x, i64 25
%24 = load float, ptr %arrayidx.24, align 4
%add.24 = fadd fast float %24, %add.23
%arrayidx.25 = getelementptr inbounds float, ptr %x, i64 26
%25 = load float, ptr %arrayidx.25, align 4
%add.25 = fadd fast float %25, %add.24
%arrayidx.26 = getelementptr inbounds float, ptr %x, i64 27
%26 = load float, ptr %arrayidx.26, align 4
%add.26 = fadd fast float %26, %add.25
%arrayidx.27 = getelementptr inbounds float, ptr %x, i64 28
%27 = load float, ptr %arrayidx.27, align 4
%add.27 = fadd fast float %27, %add.26
%arrayidx.28 = getelementptr inbounds float, ptr %x, i64 29
%28 = load float, ptr %arrayidx.28, align 4
%add.28 = fadd fast float %28, %add.27
%arrayidx.29 = getelementptr inbounds float, ptr %x, i64 30
%29 = load float, ptr %arrayidx.29, align 4
%add.29 = fadd fast float %29, %add.28
ret float %add.29
}
define float @extra_args(ptr nocapture readonly %x, i32 %a, i32 %b) {
; CHECK-LABEL: @extra_args(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
; CHECK-NEXT: ret float [[OP_RDX1]]
;
; THRESHOLD-LABEL: @extra_args(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]])
; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
; THRESHOLD-NEXT: ret float [[OP_RDX1]]
;
entry:
%mul = mul nsw i32 %b, %a
%conv = sitofp i32 %mul to float
%0 = load float, ptr %x, align 4
%add = fadd fast float %conv, 3.000000e+00
%add1 = fadd fast float %0, %add
%arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
%1 = load float, ptr %arrayidx3, align 4
%add4 = fadd fast float %1, %add1
%add5 = fadd fast float %add4, %conv
%arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
%2 = load float, ptr %arrayidx3.1, align 4
%add4.1 = fadd fast float %2, %add5
%arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
%3 = load float, ptr %arrayidx3.2, align 4
%add4.2 = fadd fast float %3, %add4.1
%arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
%4 = load float, ptr %arrayidx3.3, align 4
%add4.3 = fadd fast float %4, %add4.2
%arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
%5 = load float, ptr %arrayidx3.4, align 4
%add4.4 = fadd fast float %5, %add4.3
%arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
%6 = load float, ptr %arrayidx3.5, align 4
%add4.5 = fadd fast float %6, %add4.4
%arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
%7 = load float, ptr %arrayidx3.6, align 4
%add4.6 = fadd fast float %7, %add4.5
ret float %add4.6
}
define float @extra_args_same_several_times(ptr nocapture readonly %x, i32 %a, i32 %b) {
; CHECK-LABEL: @extra_args_same_several_times(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]])
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]]
; CHECK-NEXT: ret float [[OP_RDX1]]
;
; THRESHOLD-LABEL: @extra_args_same_several_times(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]])
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01
; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]]
; THRESHOLD-NEXT: ret float [[OP_RDX1]]
;
entry:
%mul = mul nsw i32 %b, %a
%conv = sitofp i32 %mul to float
%0 = load float, ptr %x, align 4
%add = fadd fast float %conv, 3.000000e+00
%add1 = fadd fast float %0, %add
%arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
%1 = load float, ptr %arrayidx3, align 4
%add4 = fadd fast float %1, %add1
%add41 = fadd fast float %add4, 5.000000e+00
%add5 = fadd fast float %add41, %conv
%arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
%2 = load float, ptr %arrayidx3.1, align 4
%add4.1 = fadd fast float %2, %add5
%add4.11 = fadd fast float %add4.1, 5.000000e+00
%arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
%3 = load float, ptr %arrayidx3.2, align 4
%add4.2 = fadd fast float %3, %add4.11
%arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
%4 = load float, ptr %arrayidx3.3, align 4
%add4.3 = fadd fast float %4, %add4.2
%arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
%5 = load float, ptr %arrayidx3.4, align 4
%add4.4 = fadd fast float %5, %add4.3
%arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
%6 = load float, ptr %arrayidx3.5, align 4
%add4.5 = fadd fast float %6, %add4.4
%arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
%7 = load float, ptr %arrayidx3.6, align 4
%add4.6 = fadd fast float %7, %add4.5
ret float %add4.6
}
define float @extra_args_no_replace(ptr nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @extra_args_no_replace(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]]
; CHECK-NEXT: ret float [[OP_RDX2]]
;
; THRESHOLD-LABEL: @extra_args_no_replace(
; THRESHOLD-NEXT: entry:
; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP0]])
; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00
; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]]
; THRESHOLD-NEXT: ret float [[OP_RDX2]]
;
entry:
%mul = mul nsw i32 %b, %a
%conv = sitofp i32 %mul to float
%0 = load float, ptr %x, align 4
%convc = sitofp i32 %c to float
%addc = fadd fast float %convc, 3.000000e+00
%add = fadd fast float %conv, %addc
%add1 = fadd fast float %0, %add
%arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
%1 = load float, ptr %arrayidx3, align 4
%add4 = fadd fast float %1, %add1
%arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
%2 = load float, ptr %arrayidx3.1, align 4
%add4.1 = fadd fast float %2, %add4
%arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
%3 = load float, ptr %arrayidx3.2, align 4
%add4.2 = fadd fast float %3, %add4.1
%arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
%4 = load float, ptr %arrayidx3.3, align 4
%add4.3 = fadd fast float %4, %add4.2
%add5 = fadd fast float %add4.3, %conv
%arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
%5 = load float, ptr %arrayidx3.4, align 4
%add4.4 = fadd fast float %5, %add5
%arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
%6 = load float, ptr %arrayidx3.5, align 4
%add4.5 = fadd fast float %6, %add4.4
%arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
%7 = load float, ptr %arrayidx3.6, align 4
%add4.6 = fadd fast float %7, %add4.5
ret float %add4.6
}
define float @extra_args_no_fast(ptr %x, float %a, float %b) {
; CHECK-LABEL: @extra_args_no_fast(
; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4
; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
; CHECK-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
; CHECK-NEXT: ret float [[ADD5]]
;
; THRESHOLD-LABEL: @extra_args_no_fast(
; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
; THRESHOLD-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4
; THRESHOLD-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
; THRESHOLD-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
; THRESHOLD-NEXT: [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
; THRESHOLD-NEXT: ret float [[ADD5]]
;
%addc = fadd fast float %b, 3.0
%add = fadd fast float %a, %addc
%arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
%arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
%arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
%t0 = load float, ptr %x, align 4
%t1 = load float, ptr %arrayidx3, align 4
%t2 = load float, ptr %arrayidx3.1, align 4
%t3 = load float, ptr %arrayidx3.2, align 4
%add1 = fadd fast float %t0, %add
%add4 = fadd fast float %t1, %add1
%add4.1 = fadd float %t2, %add4 ; this is not a reduction candidate
%add4.2 = fadd fast float %t3, %add4.1
%add5 = fadd fast float %add4.2, %a
ret float %add5
}
define i32 @wobble(i32 %arg, i32 %bar) {
; CHECK-LABEL: @wobble(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[X4:%.*]] = xor i32 [[ARG]], [[BAR]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[X4]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[ARG]]
; CHECK-NEXT: ret i32 [[OP_RDX1]]
;
; THRESHOLD-LABEL: @wobble(
; THRESHOLD-NEXT: bb:
; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
; THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
; THRESHOLD-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; THRESHOLD-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP1]], [[TMP3]]
; THRESHOLD-NEXT: [[X4:%.*]] = xor i32 [[ARG]], [[BAR]]
; THRESHOLD-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], zeroinitializer
; THRESHOLD-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
; THRESHOLD-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[X4]]
; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[ARG]]
; THRESHOLD-NEXT: ret i32 [[OP_RDX1]]
;
bb:
%x1 = xor i32 %arg, %bar
%i1 = icmp eq i32 %x1, 0
%s1 = sext i1 %i1 to i32
%x2 = xor i32 %arg, %bar
%i2 = icmp eq i32 %x2, 0
%s2 = sext i1 %i2 to i32
%x3 = xor i32 %arg, %bar
%i3 = icmp eq i32 %x3, 0
%s3 = sext i1 %i3 to i32
%x4 = xor i32 %arg, %bar
%i4 = icmp eq i32 %x4, 0
%s4 = sext i1 %i4 to i32
%r1 = add nuw i32 %arg, %s1
%r2 = add nsw i32 %r1, %s2
%r3 = add nsw i32 %r2, %s3
%r4 = add nsw i32 %r3, %s4
%r5 = add nsw i32 %r4, %x4
ret i32 %r5
}