llvm/llvm/test/CodeGen/ARM/ParallelDSP/aliasing.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=armv8m.main-none-none-eabi -mattr=+dsp < %s -arm-parallel-dsp -verify -S | FileCheck %s
;
; Alias check: check that the rewrite isn't triggered when there's a store
; instruction possibly aliasing any mul load operands; arguments are passed
; without 'restrict' enabled.
;
define dso_local i32 @no_restrict(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @no_restrict(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2

; Store inserted here, aliasing with arrayidx, arrayidx1, arrayidx3
  store i16 42, ptr %arrayidx, align 2

  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

; Alias check: check that the rewrite isn't triggered when there's a store
; aliasing one of the mul load operands. Arguments are now annotated with
; 'noalias'.
;
define dso_local i32 @restrict(i32 %arg, ptr noalias %arg1, ptr noalias readonly %arg2, ptr noalias %arg3) {
; CHECK-LABEL: @restrict(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2

; Store inserted here, aliasing only with loads from 'arrayidx'.
  store i16 42, ptr %arrayidx, align 2

  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026

; Here the Mul is the LHS, and the Add the RHS.
  %add11 = add i32 %mul9, %add10

  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_dominates_all(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @store_dominates_all(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i32
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16
; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]])
; CHECK-NEXT:    [[TMP14:%.*]] = sext i16 [[TMP12]] to i32
; CHECK-NEXT:    [[TMP15:%.*]] = lshr i32 [[TMP11]], 16
; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
; CHECK-NEXT:    [[TMP17:%.*]] = sext i16 [[TMP16]] to i32
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP9]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP18]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  store i16 42, ptr %arrayidx, align 2
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @loads_dominate(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @loads_dominate(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i32
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16
; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]])
; CHECK-NEXT:    [[TMP14:%.*]] = sext i16 [[TMP12]] to i32
; CHECK-NEXT:    [[TMP15:%.*]] = lshr i32 [[TMP11]], 16
; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
; CHECK-NEXT:    [[TMP17:%.*]] = sext i16 [[TMP16]] to i32
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP9]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP18]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  store i16 42, ptr %arrayidx, align 2
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_alias_arg3_legal_1(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @store_alias_arg3_legal_1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i32
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16
; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]])
; CHECK-NEXT:    [[TMP14:%.*]] = sext i16 [[TMP12]] to i32
; CHECK-NEXT:    [[TMP15:%.*]] = lshr i32 [[TMP11]], 16
; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
; CHECK-NEXT:    [[TMP17:%.*]] = sext i16 [[TMP16]] to i32
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP9]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP18]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  store i16 42, ptr %arrayidx, align 2
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_alias_arg3_legal_2(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @store_alias_arg3_legal_2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[TMP13]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i32
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16
; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.arm.smlad(i32 [[TMP11]], i32 [[TMP2]], i32 [[MAC1_026]])
; CHECK-NEXT:    [[TMP14:%.*]] = sext i16 [[TMP12]] to i32
; CHECK-NEXT:    [[TMP15:%.*]] = lshr i32 [[TMP11]], 16
; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
; CHECK-NEXT:    [[TMP17:%.*]] = sext i16 [[TMP16]] to i32
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP9]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP18]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP17]], [[TMP7]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  store i16 42, ptr %arrayidx, align 2
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_alias_arg3_illegal_1(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr noalias nocapture %arg3) {
; CHECK-LABEL: @store_alias_arg3_illegal_1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  store i16 42, ptr %arrayidx1, align 2
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_alias_arg3_illegal_2(i32 %arg, ptr nocapture %arg1, ptr noalias nocapture readonly %arg2, ptr noalias nocapture %arg3) {
; CHECK-LABEL: @store_alias_arg3_illegal_2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  store i16 42, ptr %arrayidx, align 2
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_alias_arg2_illegal_1(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @store_alias_arg2_illegal_1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  store i16 42, ptr %arrayidx6, align 2
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

define dso_local i32 @store_alias_arg2_illegal_2(i32 %arg, ptr nocapture %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
; CHECK-LABEL: @store_alias_arg2_illegal_2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK:       for.body.preheader:
; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[ARG3:%.*]], align 2
; CHECK-NEXT:    [[DOTPRE27:%.*]] = load i16, ptr [[ARG2:%.*]], align 2
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    [[MAC1_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    ret i32 [[MAC1_0_LCSSA]]
; CHECK:       for.body:
; CHECK-NEXT:    [[MAC1_026:%.*]] = phi i32 [ [[ADD11]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_025]], 1
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i32 [[ADD]]
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[I_025]]
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV4]]
; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARG2]], i32 [[ADD]]
; CHECK-NEXT:    store i16 42, ptr [[ARRAYIDX3]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV7]], [[CONV8]]
; CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[MUL]], [[MAC1_026]]
; CHECK-NEXT:    [[ADD11]] = add i32 [[MUL9]], [[ADD10]]
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[ADD]], [[ARG]]
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
;
entry:
  %cmp24 = icmp sgt i32 %arg, 0
  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:
  %.pre = load i16, ptr %arg3, align 2
  %.pre27 = load i16, ptr %arg2, align 2
  br label %for.body

for.cond.cleanup:
  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
  ret i32 %mac1.0.lcssa

for.body:
  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
  %0 = load i16, ptr %arrayidx, align 2
  %add = add nuw nsw i32 %i.025, 1
  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
  %1 = load i16, ptr %arrayidx1, align 2
  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
  %2 = load i16, ptr %arrayidx3, align 2
  %conv = sext i16 %2 to i32
  %conv4 = sext i16 %0 to i32
  %mul = mul nsw i32 %conv, %conv4
  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
  store i16 42, ptr %arrayidx3, align 2
  %3 = load i16, ptr %arrayidx6, align 2
  %conv7 = sext i16 %3 to i32
  %conv8 = sext i16 %1 to i32
  %mul9 = mul nsw i32 %conv7, %conv8
  %add10 = add i32 %mul, %mac1.026
  %add11 = add i32 %mul9, %add10
  %exitcond = icmp ne i32 %add, %arg
  br i1 %exitcond, label %for.body, label %for.cond.cleanup
}

; TODO: I think we should be able to generate one smlad here. The search fails
; when it finds the alias.
define i32 @one_pair_alias(ptr noalias nocapture readonly %b, ptr noalias nocapture %c) {
; CHECK-LABEL: @one_pair_alias(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    ret i32 [[ADD26:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[I_050:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[A_049:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD26]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[ADD3:%.*]] = or i32 [[I_050]], 1
; CHECK-NEXT:    [[ADD11:%.*]] = or i32 [[I_050]], 2
; CHECK-NEXT:    [[ADD19:%.*]] = or i32 [[I_050]], 3
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[I_050]]
; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[ADD3]]
; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[ADD11]]
; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[ADD19]]
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i32 [[I_050]]
; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[ADD3]]
; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[ADD11]]
; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[ADD19]]
; CHECK-NEXT:    [[TMP:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX12]], align 2
; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX20]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
; CHECK-NEXT:    store i16 43, ptr [[ARRAYIDX7]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2
; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX23]], align 2
; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP]] to i32
; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[A_049]]
; CHECK-NEXT:    [[CONV5:%.*]] = sext i16 [[TMP2]] to i32
; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP3]] to i32
; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV8]], [[CONV5]]
; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 [[ADD]], [[MUL9]]
; CHECK-NEXT:    [[CONV13:%.*]] = sext i16 [[TMP4]] to i32
; CHECK-NEXT:    [[CONV16:%.*]] = sext i16 [[TMP5]] to i32
; CHECK-NEXT:    [[MUL17:%.*]] = mul nsw i32 [[CONV16]], [[CONV13]]
; CHECK-NEXT:    [[ADD18:%.*]] = add nsw i32 [[ADD10]], [[MUL17]]
; CHECK-NEXT:    [[CONV21:%.*]] = sext i16 [[TMP6]] to i32
; CHECK-NEXT:    [[CONV24:%.*]] = sext i16 [[TMP7]] to i32
; CHECK-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[CONV24]], [[CONV21]]
; CHECK-NEXT:    [[ADD26]] = add nsw i32 [[ADD18]], [[MUL25]]
; CHECK-NEXT:    [[ADD27]] = add nuw nsw i32 [[I_050]], 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ADD27]], 100
; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
;
entry:
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body
  ret i32 %add26

for.body:                                         ; preds = %for.body, %entry
  %i.050 = phi i32 [ 0, %entry ], [ %add27, %for.body ]
  %a.049 = phi i32 [ 0, %entry ], [ %add26, %for.body ]
  %add3 = or i32 %i.050, 1
  %add11 = or i32 %i.050, 2
  %add19 = or i32 %i.050, 3
  %arrayidx = getelementptr inbounds i16, ptr %b, i32 %i.050
  %arrayidx4 = getelementptr inbounds i16, ptr %b, i32 %add3
  %arrayidx12 = getelementptr inbounds i16, ptr %b, i32 %add11
  %arrayidx20 = getelementptr inbounds i16, ptr %b, i32 %add19
  %arrayidx1 = getelementptr inbounds i16, ptr %c, i32 %i.050
  %arrayidx7 = getelementptr inbounds i16, ptr %c, i32 %add3
  %arrayidx15 = getelementptr inbounds i16, ptr %c, i32 %add11
  %arrayidx23 = getelementptr inbounds i16, ptr %c, i32 %add19
  %tmp = load i16, ptr %arrayidx, align 2
  %tmp2 = load i16, ptr %arrayidx4, align 2
  %tmp4 = load i16, ptr %arrayidx12, align 2
  %tmp6 = load i16, ptr %arrayidx20, align 2
  %tmp1 = load i16, ptr %arrayidx1, align 2
  store i16 43, ptr %arrayidx7
  %tmp3 = load i16, ptr %arrayidx7, align 2
  %tmp5 = load i16, ptr %arrayidx15, align 2
  %tmp7 = load i16, ptr %arrayidx23, align 2
  %conv = sext i16 %tmp to i32
  %conv2 = sext i16 %tmp1 to i32
  %mul = mul nsw i32 %conv2, %conv
  %add = add nsw i32 %mul, %a.049
  %conv5 = sext i16 %tmp2 to i32
  %conv8 = sext i16 %tmp3 to i32
  %mul9 = mul nsw i32 %conv8, %conv5
  %add10 = add nsw i32 %add, %mul9
  %conv13 = sext i16 %tmp4 to i32
  %conv16 = sext i16 %tmp5 to i32
  %mul17 = mul nsw i32 %conv16, %conv13
  %add18 = add nsw i32 %add10, %mul17
  %conv21 = sext i16 %tmp6 to i32
  %conv24 = sext i16 %tmp7 to i32
  %mul25 = mul nsw i32 %conv24, %conv21
  %add26 = add nsw i32 %add18, %mul25
  %add27 = add nuw nsw i32 %i.050, 4
  %cmp = icmp ult i32 %add27, 100
  br i1 %cmp, label %for.body, label %for.cond.cleanup
}