llvm/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s

define void @add0(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @add0(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 1, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %add = add nsw i32 %0, 1
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %add3 = add nsw i32 %1, 1
  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %add3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %add6 = add nsw i32 %2, 2
  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %add6, ptr %incdec.ptr4, align 4
  %3 = load i32, ptr %incdec.ptr5, align 4
  %add9 = add nsw i32 %3, 3
  store i32 %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @add1(ptr noalias %dst, ptr noalias %src) {
; NON-POW2-LABEL: @add1(
; NON-POW2-NEXT:  entry:
; NON-POW2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
; NON-POW2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
; NON-POW2-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
; NON-POW2-NEXT:    store i32 [[TMP0]], ptr [[DST]], align 4
; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[INCDEC_PTR]], align 4
; NON-POW2-NEXT:    [[TMP2:%.*]] = add nsw <3 x i32> [[TMP1]], <i32 1, i32 2, i32 3>
; NON-POW2-NEXT:    store <3 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; NON-POW2-NEXT:    ret void
;
; POW2-ONLY-LABEL: @add1(
; POW2-ONLY-NEXT:  entry:
; POW2-ONLY-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
; POW2-ONLY-NEXT:    store i32 [[TMP0]], ptr [[DST]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; POW2-ONLY-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
; POW2-ONLY-NEXT:    [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 2>
; POW2-ONLY-NEXT:    store <2 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; POW2-ONLY-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
; POW2-ONLY-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP3]], 3
; POW2-ONLY-NEXT:    store i32 [[ADD9]], ptr [[INCDEC_PTR7]], align 4
; POW2-ONLY-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %0, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %add3 = add nsw i32 %1, 1
  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %add3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %add6 = add nsw i32 %2, 2
  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %add6, ptr %incdec.ptr4, align 4
  %3 = load i32, ptr %incdec.ptr5, align 4
  %add9 = add nsw i32 %3, 3
  store i32 %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @sub0(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub0(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
; CHECK-NEXT:    store i32 [[SUB]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT:    store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %sub = add nsw i32 %0, -1
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %1, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %sub5 = add nsw i32 %2, -2
  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %sub5, ptr %incdec.ptr3, align 4
  %3 = load i32, ptr %incdec.ptr4, align 4
  %sub8 = add nsw i32 %3, -3
  store i32 %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @sub1(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 4, i32 -1, i32 -2, i32 -3>
; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %add = add nsw i32 %0, 4
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %sub = add nsw i32 %1, -1
  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %sub, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %sub5 = add nsw i32 %2, -2
  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %sub5, ptr %incdec.ptr3, align 4
  %3 = load i32, ptr %incdec.ptr4, align 4
  %sub8 = add nsw i32 %3, -3
  store i32 %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @sub2(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -2, i32 -3>
; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %sub = add nsw i32 %0, -1
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %sub3 = add nsw i32 %1, -1
  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %sub3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %sub6 = add nsw i32 %2, -2
  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %sub6, ptr %incdec.ptr4, align 4
  %3 = load i32, ptr %incdec.ptr5, align 4
  %sub9 = add nsw i32 %3, -3
  store i32 %sub9, ptr %incdec.ptr7, align 4
  ret void
}

define void @addsub0(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub0(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
; CHECK-NEXT:    store i32 [[SUB]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT:    store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %sub = add nsw i32 %0, -1
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %1, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %sub5 = add nsw i32 %2, -2
  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %sub5, ptr %incdec.ptr3, align 4
  %3 = load i32, ptr %incdec.ptr4, align 4
  %sub8 = sub nsw i32 %3, -3
  store i32 %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @addsub1(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT:    store i32 [[TMP4]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    [[SUB8:%.*]] = sub nsw i32 [[TMP5]], -3
; CHECK-NEXT:    store i32 [[SUB8]], ptr [[INCDEC_PTR6]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %sub = add nsw i32 %0, -1
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %sub1 = sub nsw i32 %1, -1
  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %sub1, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %2, ptr %incdec.ptr3, align 4
  %3 = load i32, ptr %incdec.ptr4, align 4
  %sub8 = sub nsw i32 %3, -3
  store i32 %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @mul(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @mul(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 257, i32 -3>
; CHECK-NEXT:    store <2 x i32> [[TMP1]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT:    store i32 [[TMP2]], ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP3]], -9
; CHECK-NEXT:    store i32 [[MUL9]], ptr [[INCDEC_PTR7]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %mul = mul nsw i32 %0, 257
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %mul, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %mul3 = mul nsw i32 %1, -3
  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %mul3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %2, ptr %incdec.ptr4, align 4
  %3 = load i32, ptr %incdec.ptr5, align 4
  %mul9 = mul nsw i32 %3, -9
  store i32 %mul9, ptr %incdec.ptr7, align 4
  ret void
}

define void @shl0(ptr noalias %dst, ptr noalias %src) {
; NON-POW2-LABEL: @shl0(
; NON-POW2-NEXT:  entry:
; NON-POW2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
; NON-POW2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
; NON-POW2-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
; NON-POW2-NEXT:    store i32 [[TMP0]], ptr [[DST]], align 4
; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[INCDEC_PTR]], align 4
; NON-POW2-NEXT:    [[TMP2:%.*]] = shl <3 x i32> [[TMP1]], <i32 1, i32 2, i32 3>
; NON-POW2-NEXT:    store <3 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; NON-POW2-NEXT:    ret void
;
; POW2-ONLY-LABEL: @shl0(
; POW2-ONLY-NEXT:  entry:
; POW2-ONLY-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
; POW2-ONLY-NEXT:    store i32 [[TMP0]], ptr [[DST]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; POW2-ONLY-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
; POW2-ONLY-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 2>
; POW2-ONLY-NEXT:    store <2 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; POW2-ONLY-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
; POW2-ONLY-NEXT:    [[SHL8:%.*]] = shl i32 [[TMP3]], 3
; POW2-ONLY-NEXT:    store i32 [[SHL8]], ptr [[INCDEC_PTR6]], align 4
; POW2-ONLY-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %0, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %shl = shl i32 %1, 1
  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %shl, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %shl5 = shl i32 %2, 2
  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %shl5, ptr %incdec.ptr3, align 4
  %3 = load i32, ptr %incdec.ptr4, align 4
  %shl8 = shl i32 %3, 3
  store i32 %shl8, ptr %incdec.ptr6, align 4
  ret void
}

define void @shl1(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @shl1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 7, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
  %0 = load i32, ptr %src, align 4
  %shl = shl i32 %0, 7
  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
  store i32 %shl, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
  %1 = load i32, ptr %incdec.ptr, align 4
  %shl3 = shl i32 %1, 1
  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
  store i32 %shl3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
  %2 = load i32, ptr %incdec.ptr2, align 4
  %shl6 = shl i32 %2, 2
  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
  store i32 %shl6, ptr %incdec.ptr4, align 4
  %3 = load i32, ptr %incdec.ptr5, align 4
  %shl9 = shl i32 %3, 3
  store i32 %shl9, ptr %incdec.ptr7, align 4
  ret void
}

define void @add0f(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @add0f(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[TMP0]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %add = fadd fast float %0, 1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %add3 = fadd fast float %1, 1.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %add3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %add6 = fadd fast float %2, 2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %add6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %add9 = fadd fast float %3, 3.000000e+00
  store float %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @add1f(ptr noalias %dst, ptr noalias %src) {
; NON-POW2-LABEL: @add1f(
; NON-POW2-NEXT:  entry:
; NON-POW2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; NON-POW2-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; NON-POW2-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; NON-POW2-NEXT:    store float [[TMP0]], ptr [[DST]], align 4
; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[INCDEC_PTR]], align 4
; NON-POW2-NEXT:    [[TMP2:%.*]] = fadd fast <3 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; NON-POW2-NEXT:    store <3 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; NON-POW2-NEXT:    ret void
;
; POW2-ONLY-LABEL: @add1f(
; POW2-ONLY-NEXT:  entry:
; POW2-ONLY-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; POW2-ONLY-NEXT:    store float [[TMP0]], ptr [[DST]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
; POW2-ONLY-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
; POW2-ONLY-NEXT:    [[TMP2:%.*]] = fadd fast <2 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00>
; POW2-ONLY-NEXT:    store <2 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; POW2-ONLY-NEXT:    [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
; POW2-ONLY-NEXT:    [[ADD9:%.*]] = fadd fast float [[TMP3]], 3.000000e+00
; POW2-ONLY-NEXT:    store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
; POW2-ONLY-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %0, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %add3 = fadd fast float %1, 1.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %add3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %add6 = fadd fast float %2, 2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %add6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %add9 = fadd fast float %3, 3.000000e+00
  store float %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @sub0f(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub0f(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; CHECK-NEXT:    store float [[ADD]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %add = fadd fast float %0, -1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %1, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %add6 = fadd fast float %2, -2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %add6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %add9 = fadd fast float %3, -3.000000e+00
  store float %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @sub1f(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub1f(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[TMP0]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %add = fadd fast float %0, 4.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub = fadd fast float %1, -1.000000e+00
  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %sub5 = fadd fast float %2, -2.000000e+00
  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
  store float %sub5, ptr %incdec.ptr3, align 4
  %3 = load float, ptr %incdec.ptr4, align 4
  %sub8 = fadd fast float %3, -3.000000e+00
  store float %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @sub2f(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub2f(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %sub = fadd fast float %0, -1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub3 = fadd fast float %1, -1.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %sub6 = fadd fast float %2, -2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %sub6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %sub9 = fadd fast float %3, -3.000000e+00
  store float %sub9, ptr %incdec.ptr7, align 4
  ret void
}

define void @addsub0f(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub0f(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; CHECK-NEXT:    [[SUB:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; CHECK-NEXT:    store float [[SUB]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %sub = fadd fast float %0, -1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
  store float %1, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %sub5 = fadd fast float %2, -2.000000e+00
  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
  store float %sub5, ptr %incdec.ptr3, align 4
  %3 = load float, ptr %incdec.ptr4, align 4
  %sub8 = fsub fast float %3, -3.000000e+00
  store float %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @addsub1f(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub1f(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00>
; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00>
; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
; CHECK-NEXT:    store float [[TMP4]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    [[SUB8:%.*]] = fsub fast float [[TMP5]], -3.000000e+00
; CHECK-NEXT:    store float [[SUB8]], ptr [[INCDEC_PTR6]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %sub = fadd fast float %0, -1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub1 = fsub fast float %1, -1.000000e+00
  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub1, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
  store float %2, ptr %incdec.ptr3, align 4
  %3 = load float, ptr %incdec.ptr4, align 4
  %sub8 = fsub fast float %3, -3.000000e+00
  store float %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @mulf(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @mulf(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x float> [[TMP0]], <float 2.570000e+02, float -3.000000e+00>
; CHECK-NEXT:    store <2 x float> [[TMP1]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
; CHECK-NEXT:    store float [[TMP2]], ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
; CHECK-NEXT:    [[SUB9:%.*]] = fmul fast float [[TMP3]], -9.000000e+00
; CHECK-NEXT:    store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %sub = fmul fast float %0, 2.570000e+02
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub3 = fmul fast float %1, -3.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %2, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %sub9 = fmul fast float %3, -9.000000e+00
  store float %sub9, ptr %incdec.ptr7, align 4
  ret void
}

define void @add0fn(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @add0fn(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %add = fadd float %0, 1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %add3 = fadd float %1, 1.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %add3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %add6 = fadd float %2, 2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %add6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %add9 = fadd float %3, 3.000000e+00
  store float %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @add1fn(ptr noalias %dst, ptr noalias %src) {
; NON-POW2-LABEL: @add1fn(
; NON-POW2-NEXT:  entry:
; NON-POW2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; NON-POW2-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; NON-POW2-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; NON-POW2-NEXT:    store float [[TMP0]], ptr [[DST]], align 4
; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[INCDEC_PTR]], align 4
; NON-POW2-NEXT:    [[TMP2:%.*]] = fadd <3 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; NON-POW2-NEXT:    store <3 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; NON-POW2-NEXT:    ret void
;
; POW2-ONLY-LABEL: @add1fn(
; POW2-ONLY-NEXT:  entry:
; POW2-ONLY-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; POW2-ONLY-NEXT:    store float [[TMP0]], ptr [[DST]], align 4
; POW2-ONLY-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
; POW2-ONLY-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
; POW2-ONLY-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00>
; POW2-ONLY-NEXT:    store <2 x float> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
; POW2-ONLY-NEXT:    [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
; POW2-ONLY-NEXT:    [[ADD9:%.*]] = fadd float [[TMP3]], 3.000000e+00
; POW2-ONLY-NEXT:    store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
; POW2-ONLY-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %0, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %add3 = fadd float %1, 1.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %add3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %add6 = fadd float %2, 2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %add6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %add9 = fadd float %3, 3.000000e+00
  store float %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @sub0fn(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub0fn(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
; CHECK-NEXT:    store float [[ADD]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], <float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %add = fadd fast float %0, -1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %1, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %add6 = fadd float %2, -2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %add6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %add9 = fadd float %3, -3.000000e+00
  store float %add9, ptr %incdec.ptr7, align 4
  ret void
}

define void @sub1fn(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub1fn(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %add = fadd float %0, 4.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %add, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub = fadd float %1, -1.000000e+00
  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub, ptr %incdec.ptr1, align 4
  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %sub5 = fadd float %2, -2.000000e+00
  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
  store float %sub5, ptr %incdec.ptr3, align 4
  %3 = load float, ptr %incdec.ptr4, align 4
  %sub8 = fadd float %3, -3.000000e+00
  store float %sub8, ptr %incdec.ptr6, align 4
  ret void
}

define void @sub2fn(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub2fn(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %sub = fadd float %0, -1.000000e+00
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub3 = fadd float %1, -1.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %sub6 = fadd float %2, -2.000000e+00
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %sub6, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %sub9 = fadd float %3, -3.000000e+00
  store float %sub9, ptr %incdec.ptr7, align 4
  ret void
}

define void @mulfn(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @mulfn(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[TMP0]], <float 2.570000e+02, float -3.000000e+00>
; CHECK-NEXT:    store <2 x float> [[TMP1]], ptr [[DST]], align 4
; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
; CHECK-NEXT:    store float [[TMP2]], ptr [[INCDEC_PTR4]], align 4
; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
; CHECK-NEXT:    [[SUB9:%.*]] = fmul fast float [[TMP3]], -9.000000e+00
; CHECK-NEXT:    store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
  %0 = load float, ptr %src, align 4
  %sub = fmul float %0, 2.570000e+02
  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
  store float %sub, ptr %dst, align 4
  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
  %1 = load float, ptr %incdec.ptr, align 4
  %sub3 = fmul float %1, -3.000000e+00
  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
  store float %sub3, ptr %incdec.ptr1, align 4
  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
  %2 = load float, ptr %incdec.ptr2, align 4
  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
  store float %2, ptr %incdec.ptr4, align 4
  %3 = load float, ptr %incdec.ptr5, align 4
  %sub9 = fmul fast float %3, -9.000000e+00
  store float %sub9, ptr %incdec.ptr7, align 4
  ret void
}