llvm/llvm/test/CodeGen/VE/Scalar/store_stk.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve | FileCheck %s

;;; Test store instructions
;;;
;;; Note:
;;;   We test store instructions using general stack, stack with dynamic
;;;   allocation, stack with dynamic allocation and alignment, and stack
;;;   with dynamic allocation, alignment, and spill.
;;;
;;; Fist test using a stack for leaf function.
;;;
;;;   |                                              | Higher address
;;;   |----------------------------------------------| <- old sp
;;;   | Local variables of fixed size                |
;;;   |----------------------------------------------| <- sp
;;;   |                                              | Lower address
;;;
;;; Access local variable using sp (%s11).  In addition, please remember
;;; that stack is aligned by 16 bytes.
;;;
;;; Second test using a general stack.
;;;
;;;   |                                              | Higher address
;;;   |----------------------------------------------|
;;;   | Parameter area for this function             |
;;;   |----------------------------------------------|
;;;   | Register save area (RSA) for this function   |
;;;   |----------------------------------------------|
;;;   | Return address for this function             |
;;;   |----------------------------------------------|
;;;   | Frame pointer for this function              |
;;;   |----------------------------------------------| <- fp(=old sp)
;;;   | Local variables of fixed size                |
;;;   |----------------------------------------------|
;;;   |.variable-sized.local.variables.(VLAs)........|
;;;   |..............................................|
;;;   |..............................................|
;;;   |----------------------------------------------| <- returned by alloca
;;;   | Parameter area for callee                    |
;;;   |----------------------------------------------|
;;;   | Register save area (RSA) for callee          |
;;;   |----------------------------------------------|
;;;   | Return address for callee                    |
;;;   |----------------------------------------------|
;;;   | Frame pointer for callee                     |
;;;   |----------------------------------------------| <- sp
;;;   |                                              | Lower address
;;;
;;; Access local variable using fp (%s9) since the size of VLA is not
;;; known.  At the beginning of the functions, allocates 240 + data
;;; bytes.  240 means RSA+RA+FP (=176) + Parameter (=64).
;;;
;;; Third test using a general stack.
;;;
;;;   |                                              | Higher address
;;;   |----------------------------------------------|
;;;   | Parameter area for this function             |
;;;   |----------------------------------------------|
;;;   | Register save area (RSA) for this function   |
;;;   |----------------------------------------------|
;;;   | Return address for this function             |
;;;   |----------------------------------------------|
;;;   | Frame pointer for this function              |
;;;   |----------------------------------------------| <- fp(=old sp)
;;;   |.empty.space.to.make.part.below.aligned.in....|
;;;   |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
;;;   |.alignment....................................|  unknown at compile time)
;;;   |----------------------------------------------|
;;;   | Local variables of fixed size including spill|
;;;   | slots                                        |
;;;   |----------------------------------------------| <- bp(not defined by ABI,
;;;   |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
;;;   |..............................................| (size of this area is
;;;   |..............................................|  unknown at compile time)
;;;   |----------------------------------------------| <- stack top (returned by
;;;   | Parameter area for callee                    |               alloca)
;;;   |----------------------------------------------|
;;;   | Register save area (RSA) for callee          |
;;;   |----------------------------------------------|
;;;   | Return address for callee                    |
;;;   |----------------------------------------------|
;;;   | Frame pointer for callee                     |
;;;   |----------------------------------------------| <- sp
;;;   |                                              | Lower address
;;;
;;; Access local variable using bp (%s17) since the size of alignment
;;; and VLA are not known.  At the beginning of the functions, allocates
;;; pad(240 + data + align) bytes.  Then, access data through bp + pad(240)
;;; since this address doesn't change even if VLA is dynamically allocated.
;;;
;;; Fourth test using a general stack with some spills.
;;;

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storei64_stk(i64 noundef %0) {
; CHECK-LABEL: storei64_stk:
; CHECK:       # %bb.0:
; CHECK-NEXT:    adds.l %s11, -16, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB0_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB0_2:
; CHECK-NEXT:    st %s0, 8(, %s11)
; CHECK-NEXT:    adds.l %s11, 16, %s11
; CHECK-NEXT:    b.l.t (, %s10)
  %2 = alloca i64, align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
  store volatile i64 %0, ptr %2, align 8, !tbaa !3
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
  ret void
}

; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)

; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storei64_stk_big(i64 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storei64_stk_big:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
; CHECK-NEXT:    brge.l %s11, %s8, .LBB1_4
; CHECK-NEXT:  # %bb.3:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB1_4:
; CHECK-NEXT:    st %s0, 2147483640(, %s11)
; CHECK-NEXT:    or %s0, 0, (0)1
; CHECK-NEXT:    lea %s2, 2147483640
; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st %s1, (%s0, %s11)
; CHECK-NEXT:    lea %s0, 8(, %s0)
; CHECK-NEXT:    brne.l %s0, %s2, .LBB1_1
; CHECK-NEXT:  # %bb.2:
; CHECK-NEXT:    lea %s13, -2147483648
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca i64, align 8
  %4 = alloca [268435455 x i64], align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4)
  store volatile i64 %0, ptr %3, align 8, !tbaa !3
  br label %6

5:                                                ; preds = %6
  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
  ret void

6:                                                ; preds = %2, %6
  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
  %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7
  store volatile i64 %1, ptr %8, align 8, !tbaa !3
  %9 = add nuw nsw i64 %7, 1
  %10 = icmp eq i64 %9, 268435455
  br i1 %10, label %5, label %6, !llvm.loop !7
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storei64_stk_big2(i64 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storei64_stk_big2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lea %s13, 2147483632
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
; CHECK-NEXT:    brge.l %s11, %s8, .LBB2_4
; CHECK-NEXT:  # %bb.3:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB2_4:
; CHECK-NEXT:    lea %s13, -2147483640
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
; CHECK-NEXT:    st %s0, (, %s13)
; CHECK-NEXT:    or %s0, 0, (0)1
; CHECK-NEXT:    lea %s2, -2147483648
; CHECK-NEXT:    and %s2, %s2, (32)0
; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st %s1, 8(%s0, %s11)
; CHECK-NEXT:    lea %s0, 8(, %s0)
; CHECK-NEXT:    brne.l %s0, %s2, .LBB2_1
; CHECK-NEXT:  # %bb.2:
; CHECK-NEXT:    lea %s13, -2147483632
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca i64, align 8
  %4 = alloca [268435456 x i64], align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4)
  store volatile i64 %0, ptr %3, align 8, !tbaa !3
  br label %6

5:                                                ; preds = %6
  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
  ret void

6:                                                ; preds = %2, %6
  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
  %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7
  store volatile i64 %1, ptr %8, align 8, !tbaa !3
  %9 = add nuw nsw i64 %7, 1
  %10 = icmp eq i64 %9, 268435456
  br i1 %10, label %5, label %6, !llvm.loop !9
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storei64_stk_dyn(i64 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storei64_stk_dyn:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -256(, %s11)
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB3_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB3_2:
; CHECK-NEXT:    or %s2, 0, %s0
; CHECK-NEXT:    lea %s0, 15(, %s1)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, 240(, %s11)
; CHECK-NEXT:    st %s2, (, %s0)
; CHECK-NEXT:    st %s2, -8(, %s9)
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca i64, align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
  %4 = alloca i8, i64 %1, align 8
  store volatile i64 %0, ptr %4, align 8, !tbaa !3
  store volatile i64 %0, ptr %3, align 8, !tbaa !3
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
  ret void
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storei64_stk_dyn_align(i64 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storei64_stk_dyn_align:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    st %s17, 40(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -288(, %s11)
; CHECK-NEXT:    and %s11, %s11, (59)1
; CHECK-NEXT:    or %s17, 0, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB4_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB4_2:
; CHECK-NEXT:    or %s2, 0, %s0
; CHECK-NEXT:    lea %s0, 15(, %s1)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, 240(, %s11)
; CHECK-NEXT:    st %s2, (, %s0)
; CHECK-NEXT:    st %s2, 256(, %s17)
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s17, 40(, %s11)
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca i64, align 32
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
  %4 = alloca i8, i64 %1, align 8
  store volatile i64 %0, ptr %4, align 8, !tbaa !3
  store volatile i64 %0, ptr %3, align 32, !tbaa !10
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
  ret void
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storei64_stk_dyn_align2(i64 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storei64_stk_dyn_align2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    st %s17, 40(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -320(, %s11)
; CHECK-NEXT:    and %s11, %s11, (58)1
; CHECK-NEXT:    or %s17, 0, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB5_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB5_2:
; CHECK-NEXT:    or %s2, 0, %s0
; CHECK-NEXT:    lea %s0, 15(, %s1)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, 240(, %s11)
; CHECK-NEXT:    st %s2, (, %s0)
; CHECK-NEXT:    st %s2, 288(, %s17)
; CHECK-NEXT:    st %s2, 256(, %s17)
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s17, 40(, %s11)
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca i64, align 32
  %4 = alloca i64, align 64
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
  %5 = alloca i8, i64 %1, align 8
  store volatile i64 %0, ptr %5, align 8, !tbaa !3
  store volatile i64 %0, ptr %3, align 32, !tbaa !10
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4)
  store volatile i64 %0, ptr %4, align 64, !tbaa !10
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
  ret void
}

; Function Attrs: nounwind
define x86_fastcallcc void @storei64_stk_dyn_align_spill(i64 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storei64_stk_dyn_align_spill:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    st %s17, 40(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -288(, %s11)
; CHECK-NEXT:    and %s11, %s11, (59)1
; CHECK-NEXT:    or %s17, 0, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB6_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB6_2:
; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    st %s20, 64(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    or %s18, 0, %s1
; CHECK-NEXT:    or %s19, 0, %s0
; CHECK-NEXT:    lea %s0, 15(, %s1)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s20, 240(, %s11)
; CHECK-NEXT:    lea %s0, dummy@lo
; CHECK-NEXT:    and %s0, %s0, (32)0
; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, pass@lo
; CHECK-NEXT:    and %s0, %s0, (32)0
; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
; CHECK-NEXT:    or %s0, 0, %s18
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    st %s19, (, %s20)
; CHECK-NEXT:    st %s19, 256(, %s17)
; CHECK-NEXT:    ld %s20, 64(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s17, 40(, %s11)
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca i64, align 32
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
  %4 = alloca i8, i64 %1, align 8
  tail call void (...) @dummy()
  tail call void @pass(i64 noundef %1)
  store volatile i64 %0, ptr %4, align 8, !tbaa !3
  store volatile i64 %0, ptr %3, align 32, !tbaa !10
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
  ret void
}

declare void @dummy(...)

declare void @pass(i64 noundef)

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storequad_stk(fp128 noundef %0) {
; CHECK-LABEL: storequad_stk:
; CHECK:       # %bb.0:
; CHECK-NEXT:    adds.l %s11, -16, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB7_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB7_2:
; CHECK-NEXT:    st %s1, (, %s11)
; CHECK-NEXT:    st %s0, 8(, %s11)
; CHECK-NEXT:    adds.l %s11, 16, %s11
; CHECK-NEXT:    b.l.t (, %s10)
  %2 = alloca fp128, align 16
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
  store volatile fp128 %0, ptr %2, align 16, !tbaa !12
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
  ret void
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storequad_stk_big(fp128 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storequad_stk_big:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lea %s13, 2147483632
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
; CHECK-NEXT:    brge.l %s11, %s8, .LBB8_4
; CHECK-NEXT:  # %bb.3:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB8_4:
; CHECK-NEXT:    lea %s13, -2147483648
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
; CHECK-NEXT:    st %s1, (, %s13)
; CHECK-NEXT:    st %s0, 8(, %s13)
; CHECK-NEXT:    or %s0, 0, (0)1
; CHECK-NEXT:    lea %s1, 2147483640
; CHECK-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st %s2, 8(%s0, %s11)
; CHECK-NEXT:    lea %s0, 8(, %s0)
; CHECK-NEXT:    brne.l %s0, %s1, .LBB8_1
; CHECK-NEXT:  # %bb.2:
; CHECK-NEXT:    lea %s13, -2147483632
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca fp128, align 16
  %4 = alloca [268435455 x i64], align 8
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4)
  store volatile fp128 %0, ptr %3, align 16, !tbaa !12
  br label %6

5:                                                ; preds = %6
  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4)
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
  ret void

6:                                                ; preds = %2, %6
  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
  %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7
  store volatile i64 %1, ptr %8, align 8, !tbaa !3
  %9 = add nuw nsw i64 %7, 1
  %10 = icmp eq i64 %9, 268435455
  br i1 %10, label %5, label %6, !llvm.loop !14
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storequad_stk_big2(fp128 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storequad_stk_big2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lea %s13, 2147483632
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
; CHECK-NEXT:    brge.l %s11, %s8, .LBB9_4
; CHECK-NEXT:  # %bb.3:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB9_4:
; CHECK-NEXT:    lea %s13, -2147483648
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
; CHECK-NEXT:    st %s1, (, %s13)
; CHECK-NEXT:    st %s0, 8(, %s13)
; CHECK-NEXT:    or %s0, 0, (0)1
; CHECK-NEXT:    lea %s1, -2147483648
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st %s2, (%s0, %s11)
; CHECK-NEXT:    lea %s0, 8(, %s0)
; CHECK-NEXT:    brne.l %s0, %s1, .LBB9_1
; CHECK-NEXT:  # %bb.2:
; CHECK-NEXT:    lea %s13, -2147483632
; CHECK-NEXT:    and %s13, %s13, (32)0
; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca fp128, align 16
  %4 = alloca [268435456 x i64], align 8
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4)
  store volatile fp128 %0, ptr %3, align 16, !tbaa !12
  br label %6

5:                                                ; preds = %6
  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4)
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
  ret void

6:                                                ; preds = %2, %6
  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
  %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7
  store volatile i64 %1, ptr %8, align 8, !tbaa !3
  %9 = add nuw nsw i64 %7, 1
  %10 = icmp eq i64 %9, 268435456
  br i1 %10, label %5, label %6, !llvm.loop !15
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storequad_stk_dyn(fp128 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storequad_stk_dyn:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -256(, %s11)
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB10_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB10_2:
; CHECK-NEXT:    or %s4, 0, %s0
; CHECK-NEXT:    or %s5, 0, %s1
; CHECK-NEXT:    lea %s0, 15(, %s2)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, 240(, %s11)
; CHECK-NEXT:    st %s4, 8(, %s0)
; CHECK-NEXT:    st %s5, (, %s0)
; CHECK-NEXT:    st %s5, -16(, %s9)
; CHECK-NEXT:    st %s4, -8(, %s9)
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca fp128, align 16
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
  %4 = alloca i8, i64 %1, align 16
  store volatile fp128 %0, ptr %4, align 16, !tbaa !12
  store volatile fp128 %0, ptr %3, align 16, !tbaa !12
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
  ret void
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storequad_stk_dyn_align(fp128 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storequad_stk_dyn_align:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    st %s17, 40(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -288(, %s11)
; CHECK-NEXT:    and %s11, %s11, (59)1
; CHECK-NEXT:    or %s17, 0, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB11_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB11_2:
; CHECK-NEXT:    or %s4, 0, %s0
; CHECK-NEXT:    or %s5, 0, %s1
; CHECK-NEXT:    lea %s0, 15(, %s2)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, 240(, %s11)
; CHECK-NEXT:    st %s4, 8(, %s0)
; CHECK-NEXT:    st %s5, (, %s0)
; CHECK-NEXT:    st %s5, 256(, %s17)
; CHECK-NEXT:    st %s4, 264(, %s17)
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s17, 40(, %s11)
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca fp128, align 32
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
  %4 = alloca i8, i64 %1, align 16
  store volatile fp128 %0, ptr %4, align 16, !tbaa !12
  store volatile fp128 %0, ptr %3, align 32, !tbaa !16
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
  ret void
}

; Function Attrs: argmemonly nofree nounwind
define x86_fastcallcc void @storequad_stk_dyn_align2(fp128 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storequad_stk_dyn_align2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    st %s17, 40(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -320(, %s11)
; CHECK-NEXT:    and %s11, %s11, (58)1
; CHECK-NEXT:    or %s17, 0, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB12_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB12_2:
; CHECK-NEXT:    or %s4, 0, %s0
; CHECK-NEXT:    or %s5, 0, %s1
; CHECK-NEXT:    lea %s0, 15(, %s2)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, 240(, %s11)
; CHECK-NEXT:    st %s4, 8(, %s0)
; CHECK-NEXT:    st %s5, (, %s0)
; CHECK-NEXT:    st %s5, 288(, %s17)
; CHECK-NEXT:    st %s4, 296(, %s17)
; CHECK-NEXT:    st %s5, 256(, %s17)
; CHECK-NEXT:    st %s4, 264(, %s17)
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s17, 40(, %s11)
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca fp128, align 32
  %4 = alloca fp128, align 64
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
  %5 = alloca i8, i64 %1, align 16
  store volatile fp128 %0, ptr %5, align 16, !tbaa !12
  store volatile fp128 %0, ptr %3, align 32, !tbaa !16
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %4)
  store volatile fp128 %0, ptr %4, align 64, !tbaa !16
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %4)
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
  ret void
}

; Function Attrs: nounwind
define x86_fastcallcc void @storequad_stk_dyn_align_spill(fp128 noundef %0, i64 noundef %1) {
; CHECK-LABEL: storequad_stk_dyn_align_spill:
; CHECK:       # %bb.0:
; CHECK-NEXT:    st %s9, (, %s11)
; CHECK-NEXT:    st %s10, 8(, %s11)
; CHECK-NEXT:    st %s17, 40(, %s11)
; CHECK-NEXT:    or %s9, 0, %s11
; CHECK-NEXT:    lea %s11, -288(, %s11)
; CHECK-NEXT:    and %s11, %s11, (59)1
; CHECK-NEXT:    or %s17, 0, %s11
; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB13_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld %s61, 24(, %s14)
; CHECK-NEXT:    or %s62, 0, %s0
; CHECK-NEXT:    lea %s63, 315
; CHECK-NEXT:    shm.l %s63, (%s61)
; CHECK-NEXT:    shm.l %s8, 8(%s61)
; CHECK-NEXT:    shm.l %s11, 16(%s61)
; CHECK-NEXT:    monc
; CHECK-NEXT:    or %s0, 0, %s62
; CHECK-NEXT:  .LBB13_2:
; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    st %s20, 64(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    st %s21, 72(, %s9) # 8-byte Folded Spill
; CHECK-NEXT:    or %s18, 0, %s2
; CHECK-NEXT:    or %s20, 0, %s0
; CHECK-NEXT:    or %s21, 0, %s1
; CHECK-NEXT:    lea %s0, 15(, %s2)
; CHECK-NEXT:    and %s0, -16, %s0
; CHECK-NEXT:    lea %s1, __ve_grow_stack@lo
; CHECK-NEXT:    and %s1, %s1, (32)0
; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack@hi(, %s1)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s19, 240(, %s11)
; CHECK-NEXT:    lea %s0, dummy@lo
; CHECK-NEXT:    and %s0, %s0, (32)0
; CHECK-NEXT:    lea.sl %s12, dummy@hi(, %s0)
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    lea %s0, pass@lo
; CHECK-NEXT:    and %s0, %s0, (32)0
; CHECK-NEXT:    lea.sl %s12, pass@hi(, %s0)
; CHECK-NEXT:    or %s0, 0, %s18
; CHECK-NEXT:    bsic %s10, (, %s12)
; CHECK-NEXT:    st %s20, 8(, %s19)
; CHECK-NEXT:    st %s21, (, %s19)
; CHECK-NEXT:    st %s21, 256(, %s17)
; CHECK-NEXT:    st %s20, 264(, %s17)
; CHECK-NEXT:    ld %s21, 72(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    ld %s20, 64(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
; CHECK-NEXT:    or %s11, 0, %s9
; CHECK-NEXT:    ld %s17, 40(, %s11)
; CHECK-NEXT:    ld %s10, 8(, %s11)
; CHECK-NEXT:    ld %s9, (, %s11)
; CHECK-NEXT:    b.l.t (, %s10)
  %3 = alloca fp128, align 32
  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
  %4 = alloca i8, i64 %1, align 16
  tail call void (...) @dummy()
  tail call void @pass(i64 noundef %1)
  store volatile fp128 %0, ptr %4, align 16, !tbaa !12
  store volatile fp128 %0, ptr %3, align 32, !tbaa !16
  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
  ret void
}

!3 = !{!4, !4, i64 0}
!4 = !{!"long", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = distinct !{!7, !8}
!8 = !{!"llvm.loop.mustprogress"}
!9 = distinct !{!9, !8}
!10 = !{!11, !4, i64 0}
!11 = !{!"", !4, i64 0}
!12 = !{!13, !13, i64 0}
!13 = !{!"long double", !5, i64 0}
!14 = distinct !{!14, !8}
!15 = distinct !{!15, !8}
!16 = !{!17, !13, i64 0}
!17 = !{!"", !13, i64 0}