llvm/llvm/test/CodeGen/AArch64/stack-hazard.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK,CHECK0
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK,CHECK64
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024

define i32 @basic(i32 noundef %num) {
; CHECK-LABEL: basic:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
entry:
  ret i32 0
}

; Non-streaming functions don't need hazards
define i32 @csr_d8_notsc(i32 noundef %num) {
; CHECK-LABEL: csr_d8_notsc:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset b8, -16
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    //APP
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{d8}"() #1
  ret i32 0
}

; Very simple - doesn't require hazards
define i32 @basic_sc(i32 noundef %num) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: basic_sc:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
entry:
  ret i32 0
}

; No fpr accesses/csrs - doesn't require hazards
define i32 @nocsr_alloci64(i64 %d) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: nocsr_alloci64:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    mov x8, x0
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    str x8, [sp, #8]
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
entry:
  %a = alloca i64
  store i64 %d, ptr %a
  ret i32 0
}

; No fpr accesses/csrs - doesn't require hazards
define i32 @csr_x20(i32 noundef %num) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: csr_x20:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    str x20, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset w20, -16
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    //APP
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    ldr x20, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{x20}"() #1
  ret i32 0
}

; CSR of d8. Make sure there is a gap between FPR and GPR
define i32 @csr_d8(i32 noundef %num) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_d8:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    .cfi_offset b8, -16
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #144
; CHECK64-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 144
; CHECK64-NEXT:    .cfi_offset b8, -80
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #144
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2064
; CHECK1024-NEXT:    .cfi_offset w29, -8
; CHECK1024-NEXT:    .cfi_offset b8, -1040
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{d8}"() #1
  ret i32 0
}

; Stack fpr objects.
define i32 @nocsr_allocd(double %d) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: nocsr_allocd:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    sub sp, sp, #16
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    str d0, [sp, #8]
; CHECK0-NEXT:    add sp, sp, #16
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: nocsr_allocd:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    .cfi_def_cfa_offset 80
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    str d0, [sp, #72]
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: nocsr_allocd:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    str d0, [sp, #1032]
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca double
  store double %d, ptr %a
  ret i32 0
}

define i32 @csr_d8d9(i32 noundef %num) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_d8d9:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    stp d9, d8, [sp, #-16]! // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    .cfi_offset b8, -8
; CHECK0-NEXT:    .cfi_offset b9, -16
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    ldp d9, d8, [sp], #16 // 16-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8d9:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #144
; CHECK64-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 144
; CHECK64-NEXT:    .cfi_offset b8, -72
; CHECK64-NEXT:    .cfi_offset b9, -80
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #144
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8d9:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1056
; CHECK1024-NEXT:    stp d9, d8, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    .cfi_offset b8, -1048
; CHECK1024-NEXT:    .cfi_offset b9, -1056
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldp d9, d8, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1056
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{d8},~{d9}"() #1
  ret i32 0
}

define i32 @csr_d8_allocd(double %d) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_d8_allocd:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    stp d8, d0, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    .cfi_offset b8, -16
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8_allocd:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #160
; CHECK64-NEXT:    stp d0, d8, [sp, #72] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 160
; CHECK64-NEXT:    .cfi_offset b8, -80
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #160
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8_allocd:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
; CHECK1024-NEXT:    .cfi_offset w29, -8
; CHECK1024-NEXT:    .cfi_offset b8, -1040
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    str d0, [sp, #1032]
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca double
  tail call void asm sideeffect "", "~{d8}"() #1
  store double %d, ptr %a
  ret i32 0
}

define i32 @csr_d8_alloci64(i64 %d) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_d8_alloci64:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    .cfi_offset b8, -16
; CHECK0-NEXT:    mov x8, x0
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    str x8, [sp, #8]
; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8_alloci64:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #160
; CHECK64-NEXT:    str d8, [sp, #80] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 160
; CHECK64-NEXT:    .cfi_offset b8, -80
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    mov x8, x0
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT:    str x8, [sp, #8]
; CHECK64-NEXT:    add sp, sp, #160
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8_alloci64:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
; CHECK1024-NEXT:    .cfi_offset w29, -8
; CHECK1024-NEXT:    .cfi_offset b8, -1040
; CHECK1024-NEXT:    mov x8, x0
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    str x8, [sp, #8]
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca i64
  tail call void asm sideeffect "", "~{d8}"() #1
  store i64 %d, ptr %a
  ret i32 0
}

; Check the frame pointer is in the right place
define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK0-LABEL: csr_d8_allocd_framepointer:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
; CHECK0-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT:    add x29, sp, #16
; CHECK0-NEXT:    .cfi_def_cfa w29, 16
; CHECK0-NEXT:    .cfi_offset w30, -8
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    .cfi_offset b8, -32
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    str d0, [sp, #8]
; CHECK0-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8_allocd_framepointer:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #176
; CHECK64-NEXT:    str d8, [sp, #80] // 8-byte Folded Spill
; CHECK64-NEXT:    stp x29, x30, [sp, #152] // 16-byte Folded Spill
; CHECK64-NEXT:    add x29, sp, #80
; CHECK64-NEXT:    .cfi_def_cfa w29, 96
; CHECK64-NEXT:    .cfi_offset w30, -16
; CHECK64-NEXT:    .cfi_offset w29, -24
; CHECK64-NEXT:    .cfi_offset b8, -96
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    stur d0, [x29, #-8]
; CHECK64-NEXT:    ldr x29, [sp, #152] // 8-byte Folded Reload
; CHECK64-NEXT:    ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    add sp, sp, #176
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8_allocd_framepointer:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1056
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    mov x29, sp
; CHECK1024-NEXT:    str x30, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    .cfi_def_cfa w29, 1056
; CHECK1024-NEXT:    .cfi_offset w30, -16
; CHECK1024-NEXT:    .cfi_offset w29, -24
; CHECK1024-NEXT:    .cfi_offset b8, -1056
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    stur d0, [x29, #-8]
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ldr x30, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1056
; CHECK1024-NEXT:    ret
entry:
  %a = alloca double
  tail call void asm sideeffect "", "~{d8}"() #1
  store double %d, ptr %a
  ret i32 0
}

; sve stack objects should live with other fpr registers
define i32 @csr_d8_allocnxv4i32(i64 %d) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_d8_allocnxv4i32:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    str x29, [sp, #8] // 8-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK0-NEXT:    .cfi_offset w29, -8
; CHECK0-NEXT:    .cfi_offset b8, -16
; CHECK0-NEXT:    mov z0.s, #0 // =0x0
; CHECK0-NEXT:    ptrue p0.s
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    st1w { z0.s }, p0, [sp]
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldr x29, [sp, #8] // 8-byte Folded Reload
; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8_allocnxv4i32:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    str d8, [sp, #-80]! // 8-byte Folded Spill
; CHECK64-NEXT:    str x29, [sp, #72] // 8-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #64
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 8 * VG
; CHECK64-NEXT:    .cfi_offset w29, -8
; CHECK64-NEXT:    .cfi_offset b8, -80
; CHECK64-NEXT:    mov z0.s, #0 // =0x0
; CHECK64-NEXT:    ptrue p0.s
; CHECK64-NEXT:    add x8, sp, #64
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    st1w { z0.s }, p0, [x8]
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    add sp, sp, #64
; CHECK64-NEXT:    ldr x29, [sp, #72] // 8-byte Folded Reload
; CHECK64-NEXT:    ldr d8, [sp], #80 // 8-byte Folded Reload
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8_allocnxv4i32:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 8 * VG
; CHECK1024-NEXT:    .cfi_offset w29, -8
; CHECK1024-NEXT:    .cfi_offset b8, -1040
; CHECK1024-NEXT:    mov z0.s, #0 // =0x0
; CHECK1024-NEXT:    ptrue p0.s
; CHECK1024-NEXT:    add x8, sp, #1024
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    st1w { z0.s }, p0, [x8]
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca <vscale x 4 x i32>
  tail call void asm sideeffect "", "~{d8}"() #1
  store <vscale x 4 x i32> zeroinitializer, ptr %a
  ret i32 0
}

define i32 @csr_x18_25_d8_15_allocdi64(i64 %d, double %e) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_x18_25_d8_15_allocdi64:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    sub sp, sp, #144
; CHECK0-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK0-NEXT:    str x25, [sp, #80] // 8-byte Folded Spill
; CHECK0-NEXT:    stp x24, x23, [sp, #96] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x22, x21, [sp, #112] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 144
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w20, -16
; CHECK0-NEXT:    .cfi_offset w21, -24
; CHECK0-NEXT:    .cfi_offset w22, -32
; CHECK0-NEXT:    .cfi_offset w23, -40
; CHECK0-NEXT:    .cfi_offset w24, -48
; CHECK0-NEXT:    .cfi_offset w25, -64
; CHECK0-NEXT:    .cfi_offset b8, -72
; CHECK0-NEXT:    .cfi_offset b9, -80
; CHECK0-NEXT:    .cfi_offset b10, -88
; CHECK0-NEXT:    .cfi_offset b11, -96
; CHECK0-NEXT:    .cfi_offset b12, -104
; CHECK0-NEXT:    .cfi_offset b13, -112
; CHECK0-NEXT:    .cfi_offset b14, -120
; CHECK0-NEXT:    .cfi_offset b15, -128
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    mov x8, x0
; CHECK0-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr x25, [sp, #80] // 8-byte Folded Reload
; CHECK0-NEXT:    ldp x22, x21, [sp, #112] // 16-byte Folded Reload
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    ldp x24, x23, [sp, #96] // 16-byte Folded Reload
; CHECK0-NEXT:    str x8, [sp, #88]
; CHECK0-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK0-NEXT:    str d0, [sp, #8]
; CHECK0-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK0-NEXT:    add sp, sp, #144
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_x18_25_d8_15_allocdi64:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #288
; CHECK64-NEXT:    stp d15, d14, [sp, #96] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d13, d12, [sp, #112] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d11, d10, [sp, #128] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d9, d8, [sp, #144] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x29, x25, [sp, #224] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x24, x23, [sp, #240] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x22, x21, [sp, #256] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x20, x19, [sp, #272] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 288
; CHECK64-NEXT:    .cfi_offset w19, -8
; CHECK64-NEXT:    .cfi_offset w20, -16
; CHECK64-NEXT:    .cfi_offset w21, -24
; CHECK64-NEXT:    .cfi_offset w22, -32
; CHECK64-NEXT:    .cfi_offset w23, -40
; CHECK64-NEXT:    .cfi_offset w24, -48
; CHECK64-NEXT:    .cfi_offset w25, -56
; CHECK64-NEXT:    .cfi_offset w29, -64
; CHECK64-NEXT:    .cfi_offset b8, -136
; CHECK64-NEXT:    .cfi_offset b9, -144
; CHECK64-NEXT:    .cfi_offset b10, -152
; CHECK64-NEXT:    .cfi_offset b11, -160
; CHECK64-NEXT:    .cfi_offset b12, -168
; CHECK64-NEXT:    .cfi_offset b13, -176
; CHECK64-NEXT:    .cfi_offset b14, -184
; CHECK64-NEXT:    .cfi_offset b15, -192
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    mov x8, x0
; CHECK64-NEXT:    ldp x20, x19, [sp, #272] // 16-byte Folded Reload
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    ldp x22, x21, [sp, #256] // 16-byte Folded Reload
; CHECK64-NEXT:    str x8, [sp, #8]
; CHECK64-NEXT:    ldp x24, x23, [sp, #240] // 16-byte Folded Reload
; CHECK64-NEXT:    str d0, [sp, #88]
; CHECK64-NEXT:    ldp x29, x25, [sp, #224] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d13, d12, [sp, #112] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d15, d14, [sp, #96] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #288
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1152
; CHECK1024-NEXT:    stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x25, [sp, #1096] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x24, [sp, #1104] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x23, [sp, #1112] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x22, [sp, #1120] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x21, [sp, #1128] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x20, [sp, #1136] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1144] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1056
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2208
; CHECK1024-NEXT:    .cfi_offset w19, -8
; CHECK1024-NEXT:    .cfi_offset w20, -16
; CHECK1024-NEXT:    .cfi_offset w21, -24
; CHECK1024-NEXT:    .cfi_offset w22, -32
; CHECK1024-NEXT:    .cfi_offset w23, -40
; CHECK1024-NEXT:    .cfi_offset w24, -48
; CHECK1024-NEXT:    .cfi_offset w25, -56
; CHECK1024-NEXT:    .cfi_offset w29, -64
; CHECK1024-NEXT:    .cfi_offset b8, -1096
; CHECK1024-NEXT:    .cfi_offset b9, -1104
; CHECK1024-NEXT:    .cfi_offset b10, -1112
; CHECK1024-NEXT:    .cfi_offset b11, -1120
; CHECK1024-NEXT:    .cfi_offset b12, -1128
; CHECK1024-NEXT:    .cfi_offset b13, -1136
; CHECK1024-NEXT:    .cfi_offset b14, -1144
; CHECK1024-NEXT:    .cfi_offset b15, -1152
; CHECK1024-NEXT:    mov x8, x0
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    str x8, [sp, #8]
; CHECK1024-NEXT:    str d0, [sp, #1048]
; CHECK1024-NEXT:    add sp, sp, #1056
; CHECK1024-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x19, [sp, #1144] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x20, [sp, #1136] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x21, [sp, #1128] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x22, [sp, #1120] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x23, [sp, #1112] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x24, [sp, #1104] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x25, [sp, #1096] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1152
; CHECK1024-NEXT:    ret
entry:
  %a = alloca i64
  %b = alloca double
  tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"()
  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"()
  store i64 %d, ptr %a
  store double %e, ptr %b
  ret i32 0
}

define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarch64_pstate_sm_body" "target-features"="+sme" {
; CHECK0-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    sub sp, sp, #176
; CHECK0-NEXT:    .cfi_def_cfa_offset 176
; CHECK0-NEXT:    rdsvl x9, #1
; CHECK0-NEXT:    stp d15, d14, [sp, #48] // 16-byte Folded Spill
; CHECK0-NEXT:    lsr x9, x9, #3
; CHECK0-NEXT:    stp d13, d12, [sp, #64] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d11, d10, [sp, #80] // 16-byte Folded Spill
; CHECK0-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
; CHECK0-NEXT:    cntd x9
; CHECK0-NEXT:    str x9, [sp, #40] // 8-byte Folded Spill
; CHECK0-NEXT:    stp d9, d8, [sp, #96] // 16-byte Folded Spill
; CHECK0-NEXT:    str x25, [sp, #112] // 8-byte Folded Spill
; CHECK0-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w20, -16
; CHECK0-NEXT:    .cfi_offset w21, -24
; CHECK0-NEXT:    .cfi_offset w22, -32
; CHECK0-NEXT:    .cfi_offset w23, -40
; CHECK0-NEXT:    .cfi_offset w24, -48
; CHECK0-NEXT:    .cfi_offset w25, -64
; CHECK0-NEXT:    .cfi_offset b8, -72
; CHECK0-NEXT:    .cfi_offset b9, -80
; CHECK0-NEXT:    .cfi_offset b10, -88
; CHECK0-NEXT:    .cfi_offset b11, -96
; CHECK0-NEXT:    .cfi_offset b12, -104
; CHECK0-NEXT:    .cfi_offset b13, -112
; CHECK0-NEXT:    .cfi_offset b14, -120
; CHECK0-NEXT:    .cfi_offset b15, -128
; CHECK0-NEXT:    .cfi_offset vg, -136
; CHECK0-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
; CHECK0-NEXT:    smstart sm
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK0-NEXT:    str x0, [sp, #24]
; CHECK0-NEXT:    str d0, [sp, #16]
; CHECK0-NEXT:    smstop sm
; CHECK0-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr x25, [sp, #112] // 8-byte Folded Reload
; CHECK0-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d9, d8, [sp, #96] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d11, d10, [sp, #80] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d13, d12, [sp, #64] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d15, d14, [sp, #48] // 16-byte Folded Reload
; CHECK0-NEXT:    add sp, sp, #176
; CHECK0-NEXT:    .cfi_def_cfa_offset 0
; CHECK0-NEXT:    .cfi_restore w19
; CHECK0-NEXT:    .cfi_restore w20
; CHECK0-NEXT:    .cfi_restore w21
; CHECK0-NEXT:    .cfi_restore w22
; CHECK0-NEXT:    .cfi_restore w23
; CHECK0-NEXT:    .cfi_restore w24
; CHECK0-NEXT:    .cfi_restore w25
; CHECK0-NEXT:    .cfi_restore b8
; CHECK0-NEXT:    .cfi_restore b9
; CHECK0-NEXT:    .cfi_restore b10
; CHECK0-NEXT:    .cfi_restore b11
; CHECK0-NEXT:    .cfi_restore b12
; CHECK0-NEXT:    .cfi_restore b13
; CHECK0-NEXT:    .cfi_restore b14
; CHECK0-NEXT:    .cfi_restore b15
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #304
; CHECK64-NEXT:    .cfi_def_cfa_offset 304
; CHECK64-NEXT:    rdsvl x9, #1
; CHECK64-NEXT:    stp d15, d14, [sp, #112] // 16-byte Folded Spill
; CHECK64-NEXT:    lsr x9, x9, #3
; CHECK64-NEXT:    stp d13, d12, [sp, #128] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d11, d10, [sp, #144] // 16-byte Folded Spill
; CHECK64-NEXT:    str x9, [sp, #96] // 8-byte Folded Spill
; CHECK64-NEXT:    cntd x9
; CHECK64-NEXT:    str x9, [sp, #104] // 8-byte Folded Spill
; CHECK64-NEXT:    stp d9, d8, [sp, #160] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x29, x25, [sp, #240] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x24, x23, [sp, #256] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x22, x21, [sp, #272] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x20, x19, [sp, #288] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_offset w19, -8
; CHECK64-NEXT:    .cfi_offset w20, -16
; CHECK64-NEXT:    .cfi_offset w21, -24
; CHECK64-NEXT:    .cfi_offset w22, -32
; CHECK64-NEXT:    .cfi_offset w23, -40
; CHECK64-NEXT:    .cfi_offset w24, -48
; CHECK64-NEXT:    .cfi_offset w25, -56
; CHECK64-NEXT:    .cfi_offset w29, -64
; CHECK64-NEXT:    .cfi_offset b8, -136
; CHECK64-NEXT:    .cfi_offset b9, -144
; CHECK64-NEXT:    .cfi_offset b10, -152
; CHECK64-NEXT:    .cfi_offset b11, -160
; CHECK64-NEXT:    .cfi_offset b12, -168
; CHECK64-NEXT:    .cfi_offset b13, -176
; CHECK64-NEXT:    .cfi_offset b14, -184
; CHECK64-NEXT:    .cfi_offset b15, -192
; CHECK64-NEXT:    .cfi_offset vg, -200
; CHECK64-NEXT:    str d0, [sp, #80] // 8-byte Folded Spill
; CHECK64-NEXT:    smstart sm
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    ldr d0, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT:    str x0, [sp, #8]
; CHECK64-NEXT:    str d0, [sp, #88]
; CHECK64-NEXT:    smstop sm
; CHECK64-NEXT:    ldp x20, x19, [sp, #288] // 16-byte Folded Reload
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    ldp x22, x21, [sp, #272] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x24, x23, [sp, #256] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x29, x25, [sp, #240] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d9, d8, [sp, #160] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d11, d10, [sp, #144] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d13, d12, [sp, #128] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d15, d14, [sp, #112] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #304
; CHECK64-NEXT:    .cfi_def_cfa_offset 0
; CHECK64-NEXT:    .cfi_restore w19
; CHECK64-NEXT:    .cfi_restore w20
; CHECK64-NEXT:    .cfi_restore w21
; CHECK64-NEXT:    .cfi_restore w22
; CHECK64-NEXT:    .cfi_restore w23
; CHECK64-NEXT:    .cfi_restore w24
; CHECK64-NEXT:    .cfi_restore w25
; CHECK64-NEXT:    .cfi_restore w29
; CHECK64-NEXT:    .cfi_restore b8
; CHECK64-NEXT:    .cfi_restore b9
; CHECK64-NEXT:    .cfi_restore b10
; CHECK64-NEXT:    .cfi_restore b11
; CHECK64-NEXT:    .cfi_restore b12
; CHECK64-NEXT:    .cfi_restore b13
; CHECK64-NEXT:    .cfi_restore b14
; CHECK64-NEXT:    .cfi_restore b15
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    rdsvl x9, #1
; CHECK1024-NEXT:    lsr x9, x9, #3
; CHECK1024-NEXT:    sub sp, sp, #1168
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1168
; CHECK1024-NEXT:    str x9, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    cntd x9
; CHECK1024-NEXT:    str x9, [sp, #8] // 8-byte Folded Spill
; CHECK1024-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1104] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x25, [sp, #1112] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x24, [sp, #1120] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x23, [sp, #1128] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x22, [sp, #1136] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x21, [sp, #1144] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x20, [sp, #1152] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1160] // 8-byte Folded Spill
; CHECK1024-NEXT:    .cfi_offset w19, -8
; CHECK1024-NEXT:    .cfi_offset w20, -16
; CHECK1024-NEXT:    .cfi_offset w21, -24
; CHECK1024-NEXT:    .cfi_offset w22, -32
; CHECK1024-NEXT:    .cfi_offset w23, -40
; CHECK1024-NEXT:    .cfi_offset w24, -48
; CHECK1024-NEXT:    .cfi_offset w25, -56
; CHECK1024-NEXT:    .cfi_offset w29, -64
; CHECK1024-NEXT:    .cfi_offset b8, -1096
; CHECK1024-NEXT:    .cfi_offset b9, -1104
; CHECK1024-NEXT:    .cfi_offset b10, -1112
; CHECK1024-NEXT:    .cfi_offset b11, -1120
; CHECK1024-NEXT:    .cfi_offset b12, -1128
; CHECK1024-NEXT:    .cfi_offset b13, -1136
; CHECK1024-NEXT:    .cfi_offset b14, -1144
; CHECK1024-NEXT:    .cfi_offset b15, -1152
; CHECK1024-NEXT:    .cfi_offset vg, -1160
; CHECK1024-NEXT:    sub sp, sp, #1056
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2224
; CHECK1024-NEXT:    str d0, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    smstart sm
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    ldr d0, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT:    str x0, [sp, #8]
; CHECK1024-NEXT:    str d0, [sp, #1048]
; CHECK1024-NEXT:    smstop sm
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    add sp, sp, #1056
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1168
; CHECK1024-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x19, [sp, #1160] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x20, [sp, #1152] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x21, [sp, #1144] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x22, [sp, #1136] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x23, [sp, #1128] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x24, [sp, #1120] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x25, [sp, #1112] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1104] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1168
; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
; CHECK1024-NEXT:    .cfi_restore w19
; CHECK1024-NEXT:    .cfi_restore w20
; CHECK1024-NEXT:    .cfi_restore w21
; CHECK1024-NEXT:    .cfi_restore w22
; CHECK1024-NEXT:    .cfi_restore w23
; CHECK1024-NEXT:    .cfi_restore w24
; CHECK1024-NEXT:    .cfi_restore w25
; CHECK1024-NEXT:    .cfi_restore w29
; CHECK1024-NEXT:    .cfi_restore b8
; CHECK1024-NEXT:    .cfi_restore b9
; CHECK1024-NEXT:    .cfi_restore b10
; CHECK1024-NEXT:    .cfi_restore b11
; CHECK1024-NEXT:    .cfi_restore b12
; CHECK1024-NEXT:    .cfi_restore b13
; CHECK1024-NEXT:    .cfi_restore b14
; CHECK1024-NEXT:    .cfi_restore b15
; CHECK1024-NEXT:    ret
entry:
  %a = alloca i64
  %b = alloca double
  tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"()
  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"()
  store i64 %d, ptr %a
  store double %e, ptr %b
  ret i32 0
}

; We don't currently handle fpr stack arguments very well (they are hopefully relatively rare).
define float @nocsr_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: nocsr_stackargs:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ldr s0, [sp]
; CHECK-NEXT:    ret
entry:
  ret float %i
}

define float @csr_x20_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: csr_x20_stackargs:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    str x20, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset w20, -16
; CHECK-NEXT:    ldr s0, [sp, #16]
; CHECK-NEXT:    //APP
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    ldr x20, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{x20}"() #1
  ret float %i
}

define float @csr_d8_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: csr_d8_stackargs:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    .cfi_offset b8, -16
; CHECK0-NEXT:    ldr s0, [sp, #16]
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: csr_d8_stackargs:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #144
; CHECK64-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 144
; CHECK64-NEXT:    .cfi_offset b8, -80
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    ldr s0, [sp, #144]
; CHECK64-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #144
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: csr_d8_stackargs:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2064
; CHECK1024-NEXT:    .cfi_offset w29, -8
; CHECK1024-NEXT:    .cfi_offset b8, -1040
; CHECK1024-NEXT:    ldr s0, [sp, #2064]
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{d8}"() #1
  ret float %i
}

; SVE calling conventions
define i32 @svecc_basic(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: svecc_basic:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
entry:
  ret i32 0
}

define i32 @svecc_csr_x20(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: svecc_csr_x20:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    str x20, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset w20, -16
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    //APP
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    ldr x20, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{x20}"() #1
  ret i32 0
}

define i32 @svecc_csr_d8(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_csr_d8:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_csr_d8:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #64
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 8 * VG
; CHECK64-NEXT:    .cfi_offset w29, -16
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    add sp, sp, #64
; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_csr_d8:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 8 * VG
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{d8}"() #1
  ret i32 0
}

define i32 @svecc_csr_d8d9(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_csr_d8d9:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-2
; CHECK0-NEXT:    str z9, [sp] // 16-byte Folded Spill
; CHECK0-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #2
; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_csr_d8d9:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-2
; CHECK64-NEXT:    str z9, [sp] // 16-byte Folded Spill
; CHECK64-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #64
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
; CHECK64-NEXT:    .cfi_offset w29, -16
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 80 - 16 * VG
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    add sp, sp, #64
; CHECK64-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #2
; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_csr_d8d9:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-2
; CHECK1024-NEXT:    str z9, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 16 * VG
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1040 - 16 * VG
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #2
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{d8},~{d9}"() #1
  ret i32 0
}

define i32 @svecc_csr_d8_allocd(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_csr_d8_allocd:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    addvl x8, sp, #1
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK0-NEXT:    str d0, [x8, #8]
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_csr_d8_allocd:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG
; CHECK64-NEXT:    .cfi_offset w29, -16
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    str d0, [sp, #72]
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_csr_d8_allocd:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2080 + 8 * VG
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    str d0, [sp, #1032]
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca double
  tail call void asm sideeffect "", "~{d8}"() #1
  store double %d, ptr %a
  ret i32 0
}

define i32 @svecc_csr_d8_alloci64(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_csr_d8_alloci64:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    mov x8, x0
; CHECK0-NEXT:    addvl x9, sp, #1
; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    str x8, [x9, #8]
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_csr_d8_alloci64:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG
; CHECK64-NEXT:    .cfi_offset w29, -16
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
; CHECK64-NEXT:    mov x8, x0
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    str x8, [sp, #8]
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_csr_d8_alloci64:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2080 + 8 * VG
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
; CHECK1024-NEXT:    mov x8, x0
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    str x8, [sp, #8]
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca i64
  tail call void asm sideeffect "", "~{d8}"() #1
  store i64 %d, ptr %a
  ret i32 0
}

define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_csr_d8_allocnxv4i32:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK0-NEXT:    mov z0.s, #0 // =0x0
; CHECK0-NEXT:    ptrue p0.s
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    st1w { z0.s }, p0, [sp]
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_csr_d8_allocnxv4i32:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #64
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
; CHECK64-NEXT:    .cfi_offset w29, -16
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
; CHECK64-NEXT:    mov z0.s, #0 // =0x0
; CHECK64-NEXT:    ptrue p0.s
; CHECK64-NEXT:    add x8, sp, #64
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    st1w { z0.s }, p0, [x8]
; CHECK64-NEXT:    add sp, sp, #64
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_csr_d8_allocnxv4i32:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 16 * VG
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
; CHECK1024-NEXT:    mov z0.s, #0 // =0x0
; CHECK1024-NEXT:    ptrue p0.s
; CHECK1024-NEXT:    add x8, sp, #1024
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    st1w { z0.s }, p0, [x8]
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %a = alloca <vscale x 4 x i32>
  tail call void asm sideeffect "", "~{d8}"() #1
  store <vscale x 4 x i32> zeroinitializer, ptr %a
  ret i32 0
}

define i32 @svecc_csr_x18_25_d8_15_allocdi64(i64 %d, double %e, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_csr_x18_25_d8_15_allocdi64:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    stp x29, x25, [sp, #-64]! // 16-byte Folded Spill
; CHECK0-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-8
; CHECK0-NEXT:    str z15, [sp] // 16-byte Folded Spill
; CHECK0-NEXT:    str z14, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z13, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z12, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z11, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z10, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z9, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z8, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    sub sp, sp, #16
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 64 * VG
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w20, -16
; CHECK0-NEXT:    .cfi_offset w21, -24
; CHECK0-NEXT:    .cfi_offset w22, -32
; CHECK0-NEXT:    .cfi_offset w23, -40
; CHECK0-NEXT:    .cfi_offset w24, -48
; CHECK0-NEXT:    .cfi_offset w25, -56
; CHECK0-NEXT:    .cfi_offset w29, -64
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
; CHECK0-NEXT:    mov x8, x0
; CHECK0-NEXT:    mov w0, wzr
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    str x8, [sp, #8]
; CHECK0-NEXT:    str d0, [sp], #16
; CHECK0-NEXT:    ldr z15, [sp] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #8
; CHECK0-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp x29, x25, [sp], #64 // 16-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_csr_x18_25_d8_15_allocdi64:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #128
; CHECK64-NEXT:    stp x29, x25, [sp, #64] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-8
; CHECK64-NEXT:    str z15, [sp] // 16-byte Folded Spill
; CHECK64-NEXT:    str z14, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z13, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z12, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z11, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z10, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z9, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z8, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #96
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x01, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 224 + 64 * VG
; CHECK64-NEXT:    .cfi_offset w19, -8
; CHECK64-NEXT:    .cfi_offset w20, -16
; CHECK64-NEXT:    .cfi_offset w21, -24
; CHECK64-NEXT:    .cfi_offset w22, -32
; CHECK64-NEXT:    .cfi_offset w23, -40
; CHECK64-NEXT:    .cfi_offset w24, -48
; CHECK64-NEXT:    .cfi_offset w25, -56
; CHECK64-NEXT:    .cfi_offset w29, -64
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
; CHECK64-NEXT:    mov x8, x0
; CHECK64-NEXT:    mov w0, wzr
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    str x8, [sp, #8]
; CHECK64-NEXT:    str d0, [sp, #88]
; CHECK64-NEXT:    add sp, sp, #96
; CHECK64-NEXT:    ldr z15, [sp] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #8
; CHECK64-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x29, x25, [sp, #64] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #128
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_csr_x18_25_d8_15_allocdi64:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1088
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x25, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x24, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x23, [sp, #1048] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x22, [sp, #1056] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x21, [sp, #1064] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x20, [sp, #1072] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1080] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-8
; CHECK1024-NEXT:    str z15, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z14, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z13, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z12, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z11, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z10, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z9, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z8, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1056
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2144 + 64 * VG
; CHECK1024-NEXT:    .cfi_offset w19, -8
; CHECK1024-NEXT:    .cfi_offset w20, -16
; CHECK1024-NEXT:    .cfi_offset w21, -24
; CHECK1024-NEXT:    .cfi_offset w22, -32
; CHECK1024-NEXT:    .cfi_offset w23, -40
; CHECK1024-NEXT:    .cfi_offset w24, -48
; CHECK1024-NEXT:    .cfi_offset w25, -56
; CHECK1024-NEXT:    .cfi_offset w29, -64
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
; CHECK1024-NEXT:    mov x8, x0
; CHECK1024-NEXT:    mov w0, wzr
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    str x8, [sp, #8]
; CHECK1024-NEXT:    str d0, [sp, #1048]
; CHECK1024-NEXT:    add sp, sp, #1056
; CHECK1024-NEXT:    ldr z15, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #8
; CHECK1024-NEXT:    ldr x19, [sp, #1080] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x20, [sp, #1072] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x21, [sp, #1064] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x22, [sp, #1056] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x23, [sp, #1048] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x24, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x25, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1088
; CHECK1024-NEXT:    ret
entry:
  %a = alloca i64
  %b = alloca double
  tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"()
  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"()
  store i64 %d, ptr %a
  store double %e, ptr %b
  ret i32 0
}


define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: sve_signature_pred_2xv4i1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov p1.b, p3.b
; CHECK-NEXT:    mov p0.b, p2.b
; CHECK-NEXT:    ret
  ret [2 x <vscale x 4 x i1>] %arg2
}

define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1_caller([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: sve_signature_pred_2xv4i1_caller:
; CHECK0:       // %bb.0:
; CHECK0-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK0-NEXT:    addvl sp, sp, #-1
; CHECK0-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    mov p5.b, p0.b
; CHECK0-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    mov p4.b, p1.b
; CHECK0-NEXT:    mov p0.b, p2.b
; CHECK0-NEXT:    mov p1.b, p3.b
; CHECK0-NEXT:    mov p2.b, p5.b
; CHECK0-NEXT:    mov p3.b, p4.b
; CHECK0-NEXT:    bl sve_signature_pred_2xv4i1
; CHECK0-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #1
; CHECK0-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: sve_signature_pred_2xv4i1_caller:
; CHECK64:       // %bb.0:
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK64-NEXT:    addvl sp, sp, #-1
; CHECK64-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    sub sp, sp, #64
; CHECK64-NEXT:    mov p4.b, p1.b
; CHECK64-NEXT:    mov p5.b, p0.b
; CHECK64-NEXT:    mov p0.b, p2.b
; CHECK64-NEXT:    mov p1.b, p3.b
; CHECK64-NEXT:    mov p2.b, p5.b
; CHECK64-NEXT:    mov p3.b, p4.b
; CHECK64-NEXT:    bl sve_signature_pred_2xv4i1
; CHECK64-NEXT:    add sp, sp, #64
; CHECK64-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #1
; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #80
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: sve_signature_pred_2xv4i1_caller:
; CHECK1024:       // %bb.0:
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    addvl sp, sp, #-1
; CHECK1024-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    mov p4.b, p1.b
; CHECK1024-NEXT:    mov p5.b, p0.b
; CHECK1024-NEXT:    mov p0.b, p2.b
; CHECK1024-NEXT:    mov p1.b, p3.b
; CHECK1024-NEXT:    mov p2.b, p5.b
; CHECK1024-NEXT:    mov p3.b, p4.b
; CHECK1024-NEXT:    bl sve_signature_pred_2xv4i1
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #1
; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
  %res = call [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg2, [2 x <vscale x 4 x i1>] %arg1)
  ret [2 x <vscale x 4 x i1>] %res
}

define i32 @f128_libcall(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: f128_libcall:
; CHECK0:       // %bb.0:
; CHECK0-NEXT:    sub sp, sp, #176
; CHECK0-NEXT:    .cfi_def_cfa_offset 176
; CHECK0-NEXT:    cntd x9
; CHECK0-NEXT:    stp d15, d14, [sp, #64] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d13, d12, [sp, #80] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d11, d10, [sp, #96] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d9, d8, [sp, #112] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x30, x9, [sp, #128] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w20, -16
; CHECK0-NEXT:    .cfi_offset w21, -24
; CHECK0-NEXT:    .cfi_offset w22, -32
; CHECK0-NEXT:    .cfi_offset w30, -48
; CHECK0-NEXT:    .cfi_offset b8, -56
; CHECK0-NEXT:    .cfi_offset b9, -64
; CHECK0-NEXT:    .cfi_offset b10, -72
; CHECK0-NEXT:    .cfi_offset b11, -80
; CHECK0-NEXT:    .cfi_offset b12, -88
; CHECK0-NEXT:    .cfi_offset b13, -96
; CHECK0-NEXT:    .cfi_offset b14, -104
; CHECK0-NEXT:    .cfi_offset b15, -112
; CHECK0-NEXT:    mov w19, w1
; CHECK0-NEXT:    mov w20, w0
; CHECK0-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK0-NEXT:    stp q2, q3, [sp, #32] // 32-byte Folded Spill
; CHECK0-NEXT:    bl __arm_sme_state
; CHECK0-NEXT:    and x21, x0, #0x1
; CHECK0-NEXT:    .cfi_offset vg, -40
; CHECK0-NEXT:    tbz w21, #0, .LBB27_2
; CHECK0-NEXT:  // %bb.1:
; CHECK0-NEXT:    smstop sm
; CHECK0-NEXT:  .LBB27_2:
; CHECK0-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK0-NEXT:    bl __lttf2
; CHECK0-NEXT:    tbz w21, #0, .LBB27_4
; CHECK0-NEXT:  // %bb.3:
; CHECK0-NEXT:    smstart sm
; CHECK0-NEXT:  .LBB27_4:
; CHECK0-NEXT:    cmp w0, #0
; CHECK0-NEXT:    .cfi_restore vg
; CHECK0-NEXT:    cset w21, lt
; CHECK0-NEXT:    bl __arm_sme_state
; CHECK0-NEXT:    and x22, x0, #0x1
; CHECK0-NEXT:    .cfi_offset vg, -40
; CHECK0-NEXT:    tbz w22, #0, .LBB27_6
; CHECK0-NEXT:  // %bb.5:
; CHECK0-NEXT:    smstop sm
; CHECK0-NEXT:  .LBB27_6:
; CHECK0-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK0-NEXT:    bl __getf2
; CHECK0-NEXT:    tbz w22, #0, .LBB27_8
; CHECK0-NEXT:  // %bb.7:
; CHECK0-NEXT:    smstart sm
; CHECK0-NEXT:  .LBB27_8:
; CHECK0-NEXT:    cmp w0, #0
; CHECK0-NEXT:    cset w8, ge
; CHECK0-NEXT:    tst w8, w21
; CHECK0-NEXT:    csel w0, w20, w19, ne
; CHECK0-NEXT:    .cfi_restore vg
; CHECK0-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
; CHECK0-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d9, d8, [sp, #112] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d11, d10, [sp, #96] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d13, d12, [sp, #80] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d15, d14, [sp, #64] // 16-byte Folded Reload
; CHECK0-NEXT:    add sp, sp, #176
; CHECK0-NEXT:    .cfi_def_cfa_offset 0
; CHECK0-NEXT:    .cfi_restore w19
; CHECK0-NEXT:    .cfi_restore w20
; CHECK0-NEXT:    .cfi_restore w21
; CHECK0-NEXT:    .cfi_restore w22
; CHECK0-NEXT:    .cfi_restore w30
; CHECK0-NEXT:    .cfi_restore b8
; CHECK0-NEXT:    .cfi_restore b9
; CHECK0-NEXT:    .cfi_restore b10
; CHECK0-NEXT:    .cfi_restore b11
; CHECK0-NEXT:    .cfi_restore b12
; CHECK0-NEXT:    .cfi_restore b13
; CHECK0-NEXT:    .cfi_restore b14
; CHECK0-NEXT:    .cfi_restore b15
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: f128_libcall:
; CHECK64:       // %bb.0:
; CHECK64-NEXT:    sub sp, sp, #320
; CHECK64-NEXT:    .cfi_def_cfa_offset 320
; CHECK64-NEXT:    cntd x9
; CHECK64-NEXT:    stp d15, d14, [sp, #128] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d13, d12, [sp, #144] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d11, d10, [sp, #160] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d9, d8, [sp, #176] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x9, x22, [sp, #272] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x21, x20, [sp, #288] // 16-byte Folded Spill
; CHECK64-NEXT:    str x19, [sp, #304] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_offset w19, -16
; CHECK64-NEXT:    .cfi_offset w20, -24
; CHECK64-NEXT:    .cfi_offset w21, -32
; CHECK64-NEXT:    .cfi_offset w22, -40
; CHECK64-NEXT:    .cfi_offset w30, -56
; CHECK64-NEXT:    .cfi_offset w29, -64
; CHECK64-NEXT:    .cfi_offset b8, -136
; CHECK64-NEXT:    .cfi_offset b9, -144
; CHECK64-NEXT:    .cfi_offset b10, -152
; CHECK64-NEXT:    .cfi_offset b11, -160
; CHECK64-NEXT:    .cfi_offset b12, -168
; CHECK64-NEXT:    .cfi_offset b13, -176
; CHECK64-NEXT:    .cfi_offset b14, -184
; CHECK64-NEXT:    .cfi_offset b15, -192
; CHECK64-NEXT:    mov w19, w1
; CHECK64-NEXT:    mov w20, w0
; CHECK64-NEXT:    stp q0, q1, [sp, #64] // 32-byte Folded Spill
; CHECK64-NEXT:    stp q2, q3, [sp, #96] // 32-byte Folded Spill
; CHECK64-NEXT:    bl __arm_sme_state
; CHECK64-NEXT:    and x21, x0, #0x1
; CHECK64-NEXT:    .cfi_offset vg, -48
; CHECK64-NEXT:    tbz w21, #0, .LBB27_2
; CHECK64-NEXT:  // %bb.1:
; CHECK64-NEXT:    smstop sm
; CHECK64-NEXT:  .LBB27_2:
; CHECK64-NEXT:    ldp q0, q1, [sp, #64] // 32-byte Folded Reload
; CHECK64-NEXT:    bl __lttf2
; CHECK64-NEXT:    tbz w21, #0, .LBB27_4
; CHECK64-NEXT:  // %bb.3:
; CHECK64-NEXT:    smstart sm
; CHECK64-NEXT:  .LBB27_4:
; CHECK64-NEXT:    cmp w0, #0
; CHECK64-NEXT:    .cfi_restore vg
; CHECK64-NEXT:    cset w21, lt
; CHECK64-NEXT:    bl __arm_sme_state
; CHECK64-NEXT:    and x22, x0, #0x1
; CHECK64-NEXT:    .cfi_offset vg, -48
; CHECK64-NEXT:    tbz w22, #0, .LBB27_6
; CHECK64-NEXT:  // %bb.5:
; CHECK64-NEXT:    smstop sm
; CHECK64-NEXT:  .LBB27_6:
; CHECK64-NEXT:    ldp q0, q1, [sp, #96] // 32-byte Folded Reload
; CHECK64-NEXT:    bl __getf2
; CHECK64-NEXT:    tbz w22, #0, .LBB27_8
; CHECK64-NEXT:  // %bb.7:
; CHECK64-NEXT:    smstart sm
; CHECK64-NEXT:  .LBB27_8:
; CHECK64-NEXT:    cmp w0, #0
; CHECK64-NEXT:    cset w8, ge
; CHECK64-NEXT:    tst w8, w21
; CHECK64-NEXT:    csel w0, w20, w19, ne
; CHECK64-NEXT:    .cfi_restore vg
; CHECK64-NEXT:    ldp x20, x19, [sp, #296] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x22, x21, [sp, #280] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d11, d10, [sp, #160] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d13, d12, [sp, #144] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d15, d14, [sp, #128] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #320
; CHECK64-NEXT:    .cfi_def_cfa_offset 0
; CHECK64-NEXT:    .cfi_restore w19
; CHECK64-NEXT:    .cfi_restore w20
; CHECK64-NEXT:    .cfi_restore w21
; CHECK64-NEXT:    .cfi_restore w22
; CHECK64-NEXT:    .cfi_restore w30
; CHECK64-NEXT:    .cfi_restore w29
; CHECK64-NEXT:    .cfi_restore b8
; CHECK64-NEXT:    .cfi_restore b9
; CHECK64-NEXT:    .cfi_restore b10
; CHECK64-NEXT:    .cfi_restore b11
; CHECK64-NEXT:    .cfi_restore b12
; CHECK64-NEXT:    .cfi_restore b13
; CHECK64-NEXT:    .cfi_restore b14
; CHECK64-NEXT:    .cfi_restore b15
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: f128_libcall:
; CHECK1024:       // %bb.0:
; CHECK1024-NEXT:    sub sp, sp, #1152
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1152
; CHECK1024-NEXT:    cntd x9
; CHECK1024-NEXT:    stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x30, [sp, #1096] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x9, [sp, #1104] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x22, [sp, #1112] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x21, [sp, #1120] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x20, [sp, #1128] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1136] // 8-byte Folded Spill
; CHECK1024-NEXT:    .cfi_offset w19, -16
; CHECK1024-NEXT:    .cfi_offset w20, -24
; CHECK1024-NEXT:    .cfi_offset w21, -32
; CHECK1024-NEXT:    .cfi_offset w22, -40
; CHECK1024-NEXT:    .cfi_offset w30, -56
; CHECK1024-NEXT:    .cfi_offset w29, -64
; CHECK1024-NEXT:    .cfi_offset b8, -1096
; CHECK1024-NEXT:    .cfi_offset b9, -1104
; CHECK1024-NEXT:    .cfi_offset b10, -1112
; CHECK1024-NEXT:    .cfi_offset b11, -1120
; CHECK1024-NEXT:    .cfi_offset b12, -1128
; CHECK1024-NEXT:    .cfi_offset b13, -1136
; CHECK1024-NEXT:    .cfi_offset b14, -1144
; CHECK1024-NEXT:    .cfi_offset b15, -1152
; CHECK1024-NEXT:    sub sp, sp, #1088
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2240
; CHECK1024-NEXT:    mov w19, w1
; CHECK1024-NEXT:    mov w20, w0
; CHECK1024-NEXT:    str q3, [sp, #1072] // 16-byte Folded Spill
; CHECK1024-NEXT:    str q2, [sp, #1056] // 16-byte Folded Spill
; CHECK1024-NEXT:    str q1, [sp, #1040] // 16-byte Folded Spill
; CHECK1024-NEXT:    str q0, [sp, #1024] // 16-byte Folded Spill
; CHECK1024-NEXT:    bl __arm_sme_state
; CHECK1024-NEXT:    and x21, x0, #0x1
; CHECK1024-NEXT:    .cfi_offset vg, -48
; CHECK1024-NEXT:    tbz w21, #0, .LBB27_2
; CHECK1024-NEXT:  // %bb.1:
; CHECK1024-NEXT:    smstop sm
; CHECK1024-NEXT:  .LBB27_2:
; CHECK1024-NEXT:    ldr q0, [sp, #1024] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr q1, [sp, #1040] // 16-byte Folded Reload
; CHECK1024-NEXT:    bl __lttf2
; CHECK1024-NEXT:    tbz w21, #0, .LBB27_4
; CHECK1024-NEXT:  // %bb.3:
; CHECK1024-NEXT:    smstart sm
; CHECK1024-NEXT:  .LBB27_4:
; CHECK1024-NEXT:    cmp w0, #0
; CHECK1024-NEXT:    .cfi_restore vg
; CHECK1024-NEXT:    cset w21, lt
; CHECK1024-NEXT:    bl __arm_sme_state
; CHECK1024-NEXT:    and x22, x0, #0x1
; CHECK1024-NEXT:    .cfi_offset vg, -48
; CHECK1024-NEXT:    tbz w22, #0, .LBB27_6
; CHECK1024-NEXT:  // %bb.5:
; CHECK1024-NEXT:    smstop sm
; CHECK1024-NEXT:  .LBB27_6:
; CHECK1024-NEXT:    ldr q0, [sp, #1056] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr q1, [sp, #1072] // 16-byte Folded Reload
; CHECK1024-NEXT:    bl __getf2
; CHECK1024-NEXT:    tbz w22, #0, .LBB27_8
; CHECK1024-NEXT:  // %bb.7:
; CHECK1024-NEXT:    smstart sm
; CHECK1024-NEXT:  .LBB27_8:
; CHECK1024-NEXT:    cmp w0, #0
; CHECK1024-NEXT:    cset w8, ge
; CHECK1024-NEXT:    tst w8, w21
; CHECK1024-NEXT:    csel w0, w20, w19, ne
; CHECK1024-NEXT:    .cfi_restore vg
; CHECK1024-NEXT:    add sp, sp, #1088
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1152
; CHECK1024-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x19, [sp, #1136] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x20, [sp, #1128] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x21, [sp, #1120] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x22, [sp, #1112] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x30, [sp, #1096] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1152
; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
; CHECK1024-NEXT:    .cfi_restore w19
; CHECK1024-NEXT:    .cfi_restore w20
; CHECK1024-NEXT:    .cfi_restore w21
; CHECK1024-NEXT:    .cfi_restore w22
; CHECK1024-NEXT:    .cfi_restore w30
; CHECK1024-NEXT:    .cfi_restore w29
; CHECK1024-NEXT:    .cfi_restore b8
; CHECK1024-NEXT:    .cfi_restore b9
; CHECK1024-NEXT:    .cfi_restore b10
; CHECK1024-NEXT:    .cfi_restore b11
; CHECK1024-NEXT:    .cfi_restore b12
; CHECK1024-NEXT:    .cfi_restore b13
; CHECK1024-NEXT:    .cfi_restore b14
; CHECK1024-NEXT:    .cfi_restore b15
; CHECK1024-NEXT:    ret
  %c0 = fcmp olt fp128 %v0, %v1
  %c1 = fcmp oge fp128 %v2, %v3
  %cr = and i1 %c1, %c0
  %sel = select i1 %cr, i32 %a, i32 %b
  ret i32 %sel
}

define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_call:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 48
; CHECK0-NEXT:    cntd x9
; CHECK0-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x27, x19, [sp, #32] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w27, -16
; CHECK0-NEXT:    .cfi_offset w28, -24
; CHECK0-NEXT:    .cfi_offset w30, -40
; CHECK0-NEXT:    .cfi_offset w29, -48
; CHECK0-NEXT:    addvl sp, sp, #-18
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
; CHECK0-NEXT:    mov x8, x0
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    bl __arm_sme_state
; CHECK0-NEXT:    and x19, x0, #0x1
; CHECK0-NEXT:    .cfi_offset vg, -32
; CHECK0-NEXT:    tbz w19, #0, .LBB28_2
; CHECK0-NEXT:  // %bb.1: // %entry
; CHECK0-NEXT:    smstop sm
; CHECK0-NEXT:  .LBB28_2: // %entry
; CHECK0-NEXT:    mov x0, x8
; CHECK0-NEXT:    mov w1, #45 // =0x2d
; CHECK0-NEXT:    mov w2, #37 // =0x25
; CHECK0-NEXT:    bl memset
; CHECK0-NEXT:    tbz w19, #0, .LBB28_4
; CHECK0-NEXT:  // %bb.3: // %entry
; CHECK0-NEXT:    smstart sm
; CHECK0-NEXT:  .LBB28_4: // %entry
; CHECK0-NEXT:    mov w0, #22647 // =0x5877
; CHECK0-NEXT:    movk w0, #59491, lsl #16
; CHECK0-NEXT:    .cfi_restore vg
; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #18
; CHECK0-NEXT:    .cfi_def_cfa wsp, 48
; CHECK0-NEXT:    .cfi_restore z8
; CHECK0-NEXT:    .cfi_restore z9
; CHECK0-NEXT:    .cfi_restore z10
; CHECK0-NEXT:    .cfi_restore z11
; CHECK0-NEXT:    .cfi_restore z12
; CHECK0-NEXT:    .cfi_restore z13
; CHECK0-NEXT:    .cfi_restore z14
; CHECK0-NEXT:    .cfi_restore z15
; CHECK0-NEXT:    ldp x27, x19, [sp, #32] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
; CHECK0-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
; CHECK0-NEXT:    .cfi_def_cfa_offset 0
; CHECK0-NEXT:    .cfi_restore w19
; CHECK0-NEXT:    .cfi_restore w27
; CHECK0-NEXT:    .cfi_restore w28
; CHECK0-NEXT:    .cfi_restore w30
; CHECK0-NEXT:    .cfi_restore w29
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_call:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #112
; CHECK64-NEXT:    .cfi_def_cfa_offset 112
; CHECK64-NEXT:    cntd x9
; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x27, x19, [sp, #96] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_offset w19, -8
; CHECK64-NEXT:    .cfi_offset w27, -16
; CHECK64-NEXT:    .cfi_offset w28, -24
; CHECK64-NEXT:    .cfi_offset w30, -40
; CHECK64-NEXT:    .cfi_offset w29, -48
; CHECK64-NEXT:    addvl sp, sp, #-18
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 112 - 8 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 112 - 16 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 112 - 24 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 112 - 32 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 112 - 40 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 112 - 48 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 112 - 56 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 112 - 64 * VG
; CHECK64-NEXT:    sub sp, sp, #64
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x01, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 176 + 144 * VG
; CHECK64-NEXT:    mov x8, x0
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    bl __arm_sme_state
; CHECK64-NEXT:    and x19, x0, #0x1
; CHECK64-NEXT:    .cfi_offset vg, -32
; CHECK64-NEXT:    tbz w19, #0, .LBB28_2
; CHECK64-NEXT:  // %bb.1: // %entry
; CHECK64-NEXT:    smstop sm
; CHECK64-NEXT:  .LBB28_2: // %entry
; CHECK64-NEXT:    mov x0, x8
; CHECK64-NEXT:    mov w1, #45 // =0x2d
; CHECK64-NEXT:    mov w2, #37 // =0x25
; CHECK64-NEXT:    bl memset
; CHECK64-NEXT:    tbz w19, #0, .LBB28_4
; CHECK64-NEXT:  // %bb.3: // %entry
; CHECK64-NEXT:    smstart sm
; CHECK64-NEXT:  .LBB28_4: // %entry
; CHECK64-NEXT:    mov w0, #22647 // =0x5877
; CHECK64-NEXT:    movk w0, #59491, lsl #16
; CHECK64-NEXT:    .cfi_restore vg
; CHECK64-NEXT:    add sp, sp, #64
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #18
; CHECK64-NEXT:    .cfi_def_cfa wsp, 112
; CHECK64-NEXT:    .cfi_restore z8
; CHECK64-NEXT:    .cfi_restore z9
; CHECK64-NEXT:    .cfi_restore z10
; CHECK64-NEXT:    .cfi_restore z11
; CHECK64-NEXT:    .cfi_restore z12
; CHECK64-NEXT:    .cfi_restore z13
; CHECK64-NEXT:    .cfi_restore z14
; CHECK64-NEXT:    .cfi_restore z15
; CHECK64-NEXT:    ldp x27, x19, [sp, #96] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr x28, [sp, #88] // 8-byte Folded Reload
; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #112
; CHECK64-NEXT:    .cfi_def_cfa_offset 0
; CHECK64-NEXT:    .cfi_restore w19
; CHECK64-NEXT:    .cfi_restore w27
; CHECK64-NEXT:    .cfi_restore w28
; CHECK64-NEXT:    .cfi_restore w30
; CHECK64-NEXT:    .cfi_restore w29
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_call:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1072
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1072
; CHECK1024-NEXT:    cntd x9
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x9, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x28, [sp, #1048] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x27, [sp, #1056] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1064] // 8-byte Folded Spill
; CHECK1024-NEXT:    .cfi_offset w19, -8
; CHECK1024-NEXT:    .cfi_offset w27, -16
; CHECK1024-NEXT:    .cfi_offset w28, -24
; CHECK1024-NEXT:    .cfi_offset w30, -40
; CHECK1024-NEXT:    .cfi_offset w29, -48
; CHECK1024-NEXT:    addvl sp, sp, #-18
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1072 - 8 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1072 - 16 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1072 - 24 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1072 - 32 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1072 - 40 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1072 - 48 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1072 - 56 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1072 - 64 * VG
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2096 + 144 * VG
; CHECK1024-NEXT:    mov x8, x0
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    bl __arm_sme_state
; CHECK1024-NEXT:    and x19, x0, #0x1
; CHECK1024-NEXT:    .cfi_offset vg, -32
; CHECK1024-NEXT:    tbz w19, #0, .LBB28_2
; CHECK1024-NEXT:  // %bb.1: // %entry
; CHECK1024-NEXT:    smstop sm
; CHECK1024-NEXT:  .LBB28_2: // %entry
; CHECK1024-NEXT:    mov x0, x8
; CHECK1024-NEXT:    mov w1, #45 // =0x2d
; CHECK1024-NEXT:    mov w2, #37 // =0x25
; CHECK1024-NEXT:    bl memset
; CHECK1024-NEXT:    tbz w19, #0, .LBB28_4
; CHECK1024-NEXT:  // %bb.3: // %entry
; CHECK1024-NEXT:    smstart sm
; CHECK1024-NEXT:  .LBB28_4: // %entry
; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
; CHECK1024-NEXT:    movk w0, #59491, lsl #16
; CHECK1024-NEXT:    .cfi_restore vg
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #18
; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1072
; CHECK1024-NEXT:    .cfi_restore z8
; CHECK1024-NEXT:    .cfi_restore z9
; CHECK1024-NEXT:    .cfi_restore z10
; CHECK1024-NEXT:    .cfi_restore z11
; CHECK1024-NEXT:    .cfi_restore z12
; CHECK1024-NEXT:    .cfi_restore z13
; CHECK1024-NEXT:    .cfi_restore z14
; CHECK1024-NEXT:    .cfi_restore z15
; CHECK1024-NEXT:    ldr x19, [sp, #1064] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x27, [sp, #1056] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x28, [sp, #1048] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1072
; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
; CHECK1024-NEXT:    .cfi_restore w19
; CHECK1024-NEXT:    .cfi_restore w27
; CHECK1024-NEXT:    .cfi_restore w28
; CHECK1024-NEXT:    .cfi_restore w30
; CHECK1024-NEXT:    .cfi_restore w29
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
  %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
  ret i32 -396142473
}

define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: svecc_alloca_call:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 48
; CHECK0-NEXT:    cntd x9
; CHECK0-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x27, x19, [sp, #32] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w27, -16
; CHECK0-NEXT:    .cfi_offset w28, -24
; CHECK0-NEXT:    .cfi_offset w30, -40
; CHECK0-NEXT:    .cfi_offset w29, -48
; CHECK0-NEXT:    addvl sp, sp, #-18
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
; CHECK0-NEXT:    sub sp, sp, #48
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 96 + 144 * VG
; CHECK0-NEXT:    //APP
; CHECK0-NEXT:    //NO_APP
; CHECK0-NEXT:    bl __arm_sme_state
; CHECK0-NEXT:    and x19, x0, #0x1
; CHECK0-NEXT:    .cfi_offset vg, -32
; CHECK0-NEXT:    tbz w19, #0, .LBB29_2
; CHECK0-NEXT:  // %bb.1: // %entry
; CHECK0-NEXT:    smstop sm
; CHECK0-NEXT:  .LBB29_2: // %entry
; CHECK0-NEXT:    mov x0, sp
; CHECK0-NEXT:    mov w1, #45 // =0x2d
; CHECK0-NEXT:    mov w2, #37 // =0x25
; CHECK0-NEXT:    bl memset
; CHECK0-NEXT:    tbz w19, #0, .LBB29_4
; CHECK0-NEXT:  // %bb.3: // %entry
; CHECK0-NEXT:    smstart sm
; CHECK0-NEXT:  .LBB29_4: // %entry
; CHECK0-NEXT:    mov w0, #22647 // =0x5877
; CHECK0-NEXT:    movk w0, #59491, lsl #16
; CHECK0-NEXT:    .cfi_restore vg
; CHECK0-NEXT:    add sp, sp, #48
; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK0-NEXT:    addvl sp, sp, #18
; CHECK0-NEXT:    .cfi_def_cfa wsp, 48
; CHECK0-NEXT:    .cfi_restore z8
; CHECK0-NEXT:    .cfi_restore z9
; CHECK0-NEXT:    .cfi_restore z10
; CHECK0-NEXT:    .cfi_restore z11
; CHECK0-NEXT:    .cfi_restore z12
; CHECK0-NEXT:    .cfi_restore z13
; CHECK0-NEXT:    .cfi_restore z14
; CHECK0-NEXT:    .cfi_restore z15
; CHECK0-NEXT:    ldp x27, x19, [sp, #32] // 16-byte Folded Reload
; CHECK0-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
; CHECK0-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
; CHECK0-NEXT:    .cfi_def_cfa_offset 0
; CHECK0-NEXT:    .cfi_restore w19
; CHECK0-NEXT:    .cfi_restore w27
; CHECK0-NEXT:    .cfi_restore w28
; CHECK0-NEXT:    .cfi_restore w30
; CHECK0-NEXT:    .cfi_restore w29
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: svecc_alloca_call:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #112
; CHECK64-NEXT:    .cfi_def_cfa_offset 112
; CHECK64-NEXT:    cntd x9
; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x27, x19, [sp, #96] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_offset w19, -8
; CHECK64-NEXT:    .cfi_offset w27, -16
; CHECK64-NEXT:    .cfi_offset w28, -24
; CHECK64-NEXT:    .cfi_offset w30, -40
; CHECK64-NEXT:    .cfi_offset w29, -48
; CHECK64-NEXT:    addvl sp, sp, #-18
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 112 - 8 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 112 - 16 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 112 - 24 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 112 - 32 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 112 - 40 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 112 - 48 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 112 - 56 * VG
; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 112 - 64 * VG
; CHECK64-NEXT:    sub sp, sp, #112
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x01, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 224 + 144 * VG
; CHECK64-NEXT:    //APP
; CHECK64-NEXT:    //NO_APP
; CHECK64-NEXT:    bl __arm_sme_state
; CHECK64-NEXT:    and x19, x0, #0x1
; CHECK64-NEXT:    .cfi_offset vg, -32
; CHECK64-NEXT:    tbz w19, #0, .LBB29_2
; CHECK64-NEXT:  // %bb.1: // %entry
; CHECK64-NEXT:    smstop sm
; CHECK64-NEXT:  .LBB29_2: // %entry
; CHECK64-NEXT:    mov x0, sp
; CHECK64-NEXT:    mov w1, #45 // =0x2d
; CHECK64-NEXT:    mov w2, #37 // =0x25
; CHECK64-NEXT:    bl memset
; CHECK64-NEXT:    tbz w19, #0, .LBB29_4
; CHECK64-NEXT:  // %bb.3: // %entry
; CHECK64-NEXT:    smstart sm
; CHECK64-NEXT:  .LBB29_4: // %entry
; CHECK64-NEXT:    mov w0, #22647 // =0x5877
; CHECK64-NEXT:    movk w0, #59491, lsl #16
; CHECK64-NEXT:    .cfi_restore vg
; CHECK64-NEXT:    add sp, sp, #112
; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK64-NEXT:    addvl sp, sp, #18
; CHECK64-NEXT:    .cfi_def_cfa wsp, 112
; CHECK64-NEXT:    .cfi_restore z8
; CHECK64-NEXT:    .cfi_restore z9
; CHECK64-NEXT:    .cfi_restore z10
; CHECK64-NEXT:    .cfi_restore z11
; CHECK64-NEXT:    .cfi_restore z12
; CHECK64-NEXT:    .cfi_restore z13
; CHECK64-NEXT:    .cfi_restore z14
; CHECK64-NEXT:    .cfi_restore z15
; CHECK64-NEXT:    ldp x27, x19, [sp, #96] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr x28, [sp, #88] // 8-byte Folded Reload
; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #112
; CHECK64-NEXT:    .cfi_def_cfa_offset 0
; CHECK64-NEXT:    .cfi_restore w19
; CHECK64-NEXT:    .cfi_restore w27
; CHECK64-NEXT:    .cfi_restore w28
; CHECK64-NEXT:    .cfi_restore w30
; CHECK64-NEXT:    .cfi_restore w29
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: svecc_alloca_call:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1072
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1072
; CHECK1024-NEXT:    cntd x9
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x9, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x28, [sp, #1048] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x27, [sp, #1056] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1064] // 8-byte Folded Spill
; CHECK1024-NEXT:    .cfi_offset w19, -8
; CHECK1024-NEXT:    .cfi_offset w27, -16
; CHECK1024-NEXT:    .cfi_offset w28, -24
; CHECK1024-NEXT:    .cfi_offset w30, -40
; CHECK1024-NEXT:    .cfi_offset w29, -48
; CHECK1024-NEXT:    addvl sp, sp, #-18
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1072 - 8 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1072 - 16 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1072 - 24 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1072 - 32 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1072 - 40 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1072 - 48 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1072 - 56 * VG
; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1072 - 64 * VG
; CHECK1024-NEXT:    sub sp, sp, #1072
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2144 + 144 * VG
; CHECK1024-NEXT:    //APP
; CHECK1024-NEXT:    //NO_APP
; CHECK1024-NEXT:    bl __arm_sme_state
; CHECK1024-NEXT:    and x19, x0, #0x1
; CHECK1024-NEXT:    .cfi_offset vg, -32
; CHECK1024-NEXT:    tbz w19, #0, .LBB29_2
; CHECK1024-NEXT:  // %bb.1: // %entry
; CHECK1024-NEXT:    smstop sm
; CHECK1024-NEXT:  .LBB29_2: // %entry
; CHECK1024-NEXT:    mov x0, sp
; CHECK1024-NEXT:    mov w1, #45 // =0x2d
; CHECK1024-NEXT:    mov w2, #37 // =0x25
; CHECK1024-NEXT:    bl memset
; CHECK1024-NEXT:    tbz w19, #0, .LBB29_4
; CHECK1024-NEXT:  // %bb.3: // %entry
; CHECK1024-NEXT:    smstart sm
; CHECK1024-NEXT:  .LBB29_4: // %entry
; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
; CHECK1024-NEXT:    movk w0, #59491, lsl #16
; CHECK1024-NEXT:    .cfi_restore vg
; CHECK1024-NEXT:    add sp, sp, #1072
; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK1024-NEXT:    addvl sp, sp, #18
; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1072
; CHECK1024-NEXT:    .cfi_restore z8
; CHECK1024-NEXT:    .cfi_restore z9
; CHECK1024-NEXT:    .cfi_restore z10
; CHECK1024-NEXT:    .cfi_restore z11
; CHECK1024-NEXT:    .cfi_restore z12
; CHECK1024-NEXT:    .cfi_restore z13
; CHECK1024-NEXT:    .cfi_restore z14
; CHECK1024-NEXT:    .cfi_restore z15
; CHECK1024-NEXT:    ldr x19, [sp, #1064] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x27, [sp, #1056] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x28, [sp, #1048] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1072
; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
; CHECK1024-NEXT:    .cfi_restore w19
; CHECK1024-NEXT:    .cfi_restore w27
; CHECK1024-NEXT:    .cfi_restore w28
; CHECK1024-NEXT:    .cfi_restore w30
; CHECK1024-NEXT:    .cfi_restore w29
; CHECK1024-NEXT:    ret
entry:
  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
  %0 = alloca [37 x i8], align 16
  %call = call ptr @memset(ptr noundef nonnull %0, i32 noundef 45, i32 noundef 37)
  ret i32 -396142473
}
declare ptr @memset(ptr, i32, i32)

define void @call_with_doubles() "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: call_with_doubles:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
; CHECK0-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 16
; CHECK0-NEXT:    .cfi_offset w30, -8
; CHECK0-NEXT:    .cfi_offset b8, -16
; CHECK0-NEXT:    mov x8, #9221120237041090560 // =0x7ff8000000000000
; CHECK0-NEXT:    fmov d8, x8
; CHECK0-NEXT:    fmov d0, d8
; CHECK0-NEXT:    bl calld
; CHECK0-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
; CHECK0-NEXT:    fmov d0, d8
; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK0-NEXT:    b calld
;
; CHECK64-LABEL: call_with_doubles:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #144
; CHECK64-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
; CHECK64-NEXT:    str x30, [sp, #136] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 144
; CHECK64-NEXT:    .cfi_offset w30, -8
; CHECK64-NEXT:    .cfi_offset b8, -80
; CHECK64-NEXT:    mov x8, #9221120237041090560 // =0x7ff8000000000000
; CHECK64-NEXT:    fmov d8, x8
; CHECK64-NEXT:    fmov d0, d8
; CHECK64-NEXT:    bl calld
; CHECK64-NEXT:    fmov d0, d8
; CHECK64-NEXT:    ldr x30, [sp, #136] // 8-byte Folded Reload
; CHECK64-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
; CHECK64-NEXT:    add sp, sp, #144
; CHECK64-NEXT:    b calld
;
; CHECK1024-LABEL: call_with_doubles:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1056
; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x30, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1024
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
; CHECK1024-NEXT:    .cfi_offset w30, -16
; CHECK1024-NEXT:    .cfi_offset w29, -24
; CHECK1024-NEXT:    .cfi_offset b8, -1056
; CHECK1024-NEXT:    mov x8, #9221120237041090560 // =0x7ff8000000000000
; CHECK1024-NEXT:    fmov d8, x8
; CHECK1024-NEXT:    fmov d0, d8
; CHECK1024-NEXT:    bl calld
; CHECK1024-NEXT:    fmov d0, d8
; CHECK1024-NEXT:    add sp, sp, #1024
; CHECK1024-NEXT:    ldr x30, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1056
; CHECK1024-NEXT:    b calld
entry:
  %call = tail call i32 @calld(double 0x7FF8000000000000)
  %call.1 = tail call i32 @calld(double 0x7FF8000000000000)
  ret void
}
declare i32 @calld(double) "aarch64_pstate_sm_compatible"

; Check that stack objects are ordererd fpr > hazard > gpr
define void @ordering_test(double %d, half %h, <4 x i32> %v) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: ordering_test:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    sub sp, sp, #48
; CHECK0-NEXT:    .cfi_def_cfa_offset 48
; CHECK0-NEXT:    str wzr, [sp, #32]
; CHECK0-NEXT:    str d0, [sp, #24]
; CHECK0-NEXT:    str wzr, [sp, #44]
; CHECK0-NEXT:    str h1, [sp, #22]
; CHECK0-NEXT:    str wzr, [sp, #16]
; CHECK0-NEXT:    str q2, [sp], #48
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: ordering_test:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #128
; CHECK64-NEXT:    .cfi_def_cfa_offset 128
; CHECK64-NEXT:    stp wzr, wzr, [sp, #12]
; CHECK64-NEXT:    str d0, [sp, #120]
; CHECK64-NEXT:    str wzr, [sp, #28]
; CHECK64-NEXT:    str h1, [sp, #118]
; CHECK64-NEXT:    str q2, [sp, #96]
; CHECK64-NEXT:    add sp, sp, #128
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: ordering_test:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1088
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2128
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    stp wzr, wzr, [sp, #12]
; CHECK1024-NEXT:    str d0, [sp, #1080]
; CHECK1024-NEXT:    str wzr, [sp, #28]
; CHECK1024-NEXT:    str h1, [sp, #1078]
; CHECK1024-NEXT:    str q2, [sp, #1056]
; CHECK1024-NEXT:    add sp, sp, #1088
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %i32 = alloca i32
  %i64 = alloca i64
  %f64 = alloca double
  %f16 = alloca half
  %i32b = alloca i32
  %v4i32 = alloca <4 x i32>
  store i32 0, ptr %i64
  store double %d, ptr %f64
  store i32 0, ptr %i32
  store half %h, ptr %f16
  store i32 0, ptr %i32b
  store <4 x i32> %v, ptr %v4i32
  ret void
}


define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_pstate_sm_compatible" {
; CHECK0-LABEL: ordering_test_array:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    sub sp, sp, #272
; CHECK0-NEXT:    str x29, [sp, #256] // 8-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 272
; CHECK0-NEXT:    .cfi_offset w29, -16
; CHECK0-NEXT:    add x8, sp, #128
; CHECK0-NEXT:    str w2, [x8, x0, lsl #2]
; CHECK0-NEXT:    mov x8, sp
; CHECK0-NEXT:    str s0, [x8, x1, lsl #2]
; CHECK0-NEXT:    add sp, sp, #272
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: ordering_test_array:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    sub sp, sp, #400
; CHECK64-NEXT:    str x29, [sp, #384] // 8-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 400
; CHECK64-NEXT:    .cfi_offset w29, -16
; CHECK64-NEXT:    mov x8, sp
; CHECK64-NEXT:    str w2, [x8, x0, lsl #2]
; CHECK64-NEXT:    add x8, sp, #192
; CHECK64-NEXT:    str s0, [x8, x1, lsl #2]
; CHECK64-NEXT:    add sp, sp, #400
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: ordering_test_array:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT:    sub sp, sp, #1280
; CHECK1024-NEXT:    .cfi_def_cfa_offset 2320
; CHECK1024-NEXT:    .cfi_offset w29, -16
; CHECK1024-NEXT:    mov x8, sp
; CHECK1024-NEXT:    str w2, [x8, x0, lsl #2]
; CHECK1024-NEXT:    add x8, sp, #1152
; CHECK1024-NEXT:    str s0, [x8, x1, lsl #2]
; CHECK1024-NEXT:    add sp, sp, #1280
; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1040
; CHECK1024-NEXT:    ret
entry:
  %i32 = alloca [32 x i32]
  %f32 = alloca [32 x float]
  %g = getelementptr i32, ptr %i32, i64 %o
  store i32 %x, ptr %g
  %h = getelementptr float, ptr %f32, i64 %p
  store float %f, ptr %h
  ret void
}

; The VA register currently ends up in VLA space. Lets hope that doesn't come up very often.
define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "target-features"="+sme" {
; CHECK0-LABEL: vastate:
; CHECK0:       // %bb.0: // %entry
; CHECK0-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
; CHECK0-NEXT:    .cfi_def_cfa_offset 112
; CHECK0-NEXT:    cntd x9
; CHECK0-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK0-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK0-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK0-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
; CHECK0-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK0-NEXT:    add x29, sp, #64
; CHECK0-NEXT:    .cfi_def_cfa w29, 48
; CHECK0-NEXT:    .cfi_offset w19, -8
; CHECK0-NEXT:    .cfi_offset w20, -16
; CHECK0-NEXT:    .cfi_offset w30, -40
; CHECK0-NEXT:    .cfi_offset w29, -48
; CHECK0-NEXT:    .cfi_offset b8, -56
; CHECK0-NEXT:    .cfi_offset b9, -64
; CHECK0-NEXT:    .cfi_offset b10, -72
; CHECK0-NEXT:    .cfi_offset b11, -80
; CHECK0-NEXT:    .cfi_offset b12, -88
; CHECK0-NEXT:    .cfi_offset b13, -96
; CHECK0-NEXT:    .cfi_offset b14, -104
; CHECK0-NEXT:    .cfi_offset b15, -112
; CHECK0-NEXT:    sub sp, sp, #16
; CHECK0-NEXT:    rdsvl x8, #1
; CHECK0-NEXT:    mov x9, sp
; CHECK0-NEXT:    mov w20, w0
; CHECK0-NEXT:    msub x9, x8, x8, x9
; CHECK0-NEXT:    mov sp, x9
; CHECK0-NEXT:    stur x9, [x29, #-80]
; CHECK0-NEXT:    sub x9, x29, #80
; CHECK0-NEXT:    sturh wzr, [x29, #-70]
; CHECK0-NEXT:    stur wzr, [x29, #-68]
; CHECK0-NEXT:    sturh w8, [x29, #-72]
; CHECK0-NEXT:    msr TPIDR2_EL0, x9
; CHECK0-NEXT:    .cfi_offset vg, -32
; CHECK0-NEXT:    smstop sm
; CHECK0-NEXT:    bl other
; CHECK0-NEXT:    smstart sm
; CHECK0-NEXT:    .cfi_restore vg
; CHECK0-NEXT:    smstart za
; CHECK0-NEXT:    mrs x8, TPIDR2_EL0
; CHECK0-NEXT:    sub x0, x29, #80
; CHECK0-NEXT:    cbnz x8, .LBB33_2
; CHECK0-NEXT:  // %bb.1: // %entry
; CHECK0-NEXT:    bl __arm_tpidr2_restore
; CHECK0-NEXT:  .LBB33_2: // %entry
; CHECK0-NEXT:    mov w0, w20
; CHECK0-NEXT:    msr TPIDR2_EL0, xzr
; CHECK0-NEXT:    sub sp, x29, #64
; CHECK0-NEXT:    .cfi_def_cfa wsp, 112
; CHECK0-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK0-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
; CHECK0-NEXT:    .cfi_def_cfa_offset 0
; CHECK0-NEXT:    .cfi_restore w19
; CHECK0-NEXT:    .cfi_restore w20
; CHECK0-NEXT:    .cfi_restore w30
; CHECK0-NEXT:    .cfi_restore w29
; CHECK0-NEXT:    .cfi_restore b8
; CHECK0-NEXT:    .cfi_restore b9
; CHECK0-NEXT:    .cfi_restore b10
; CHECK0-NEXT:    .cfi_restore b11
; CHECK0-NEXT:    .cfi_restore b12
; CHECK0-NEXT:    .cfi_restore b13
; CHECK0-NEXT:    .cfi_restore b14
; CHECK0-NEXT:    .cfi_restore b15
; CHECK0-NEXT:    ret
;
; CHECK64-LABEL: vastate:
; CHECK64:       // %bb.0: // %entry
; CHECK64-NEXT:    stp d15, d14, [sp, #-176]! // 16-byte Folded Spill
; CHECK64-NEXT:    .cfi_def_cfa_offset 176
; CHECK64-NEXT:    cntd x9
; CHECK64-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK64-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x29, x30, [sp, #128] // 16-byte Folded Spill
; CHECK64-NEXT:    stp x9, x20, [sp, #144] // 16-byte Folded Spill
; CHECK64-NEXT:    str x19, [sp, #160] // 8-byte Folded Spill
; CHECK64-NEXT:    mov x29, sp
; CHECK64-NEXT:    .cfi_def_cfa w29, 176
; CHECK64-NEXT:    .cfi_offset w19, -16
; CHECK64-NEXT:    .cfi_offset w20, -24
; CHECK64-NEXT:    .cfi_offset w30, -40
; CHECK64-NEXT:    .cfi_offset w29, -48
; CHECK64-NEXT:    .cfi_offset b8, -120
; CHECK64-NEXT:    .cfi_offset b9, -128
; CHECK64-NEXT:    .cfi_offset b10, -136
; CHECK64-NEXT:    .cfi_offset b11, -144
; CHECK64-NEXT:    .cfi_offset b12, -152
; CHECK64-NEXT:    .cfi_offset b13, -160
; CHECK64-NEXT:    .cfi_offset b14, -168
; CHECK64-NEXT:    .cfi_offset b15, -176
; CHECK64-NEXT:    sub sp, sp, #80
; CHECK64-NEXT:    rdsvl x8, #1
; CHECK64-NEXT:    mov x9, sp
; CHECK64-NEXT:    mov w20, w0
; CHECK64-NEXT:    msub x9, x8, x8, x9
; CHECK64-NEXT:    mov sp, x9
; CHECK64-NEXT:    stur x9, [x29, #-80]
; CHECK64-NEXT:    sub x9, x29, #80
; CHECK64-NEXT:    sturh wzr, [x29, #-70]
; CHECK64-NEXT:    stur wzr, [x29, #-68]
; CHECK64-NEXT:    sturh w8, [x29, #-72]
; CHECK64-NEXT:    msr TPIDR2_EL0, x9
; CHECK64-NEXT:    .cfi_offset vg, -32
; CHECK64-NEXT:    smstop sm
; CHECK64-NEXT:    bl other
; CHECK64-NEXT:    smstart sm
; CHECK64-NEXT:    .cfi_restore vg
; CHECK64-NEXT:    smstart za
; CHECK64-NEXT:    mrs x8, TPIDR2_EL0
; CHECK64-NEXT:    sub x0, x29, #80
; CHECK64-NEXT:    cbnz x8, .LBB33_2
; CHECK64-NEXT:  // %bb.1: // %entry
; CHECK64-NEXT:    bl __arm_tpidr2_restore
; CHECK64-NEXT:  .LBB33_2: // %entry
; CHECK64-NEXT:    mov w0, w20
; CHECK64-NEXT:    msr TPIDR2_EL0, xzr
; CHECK64-NEXT:    mov sp, x29
; CHECK64-NEXT:    .cfi_def_cfa wsp, 176
; CHECK64-NEXT:    ldp x20, x19, [sp, #152] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr d14, [sp, #8] // 8-byte Folded Reload
; CHECK64-NEXT:    ldp x29, x30, [sp, #128] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK64-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK64-NEXT:    ldr d15, [sp], #176 // 8-byte Folded Reload
; CHECK64-NEXT:    .cfi_def_cfa_offset 0
; CHECK64-NEXT:    .cfi_restore w19
; CHECK64-NEXT:    .cfi_restore w20
; CHECK64-NEXT:    .cfi_restore w30
; CHECK64-NEXT:    .cfi_restore w29
; CHECK64-NEXT:    .cfi_restore b8
; CHECK64-NEXT:    .cfi_restore b9
; CHECK64-NEXT:    .cfi_restore b10
; CHECK64-NEXT:    .cfi_restore b11
; CHECK64-NEXT:    .cfi_restore b12
; CHECK64-NEXT:    .cfi_restore b13
; CHECK64-NEXT:    .cfi_restore b14
; CHECK64-NEXT:    .cfi_restore b15
; CHECK64-NEXT:    ret
;
; CHECK1024-LABEL: vastate:
; CHECK1024:       // %bb.0: // %entry
; CHECK1024-NEXT:    sub sp, sp, #1136
; CHECK1024-NEXT:    .cfi_def_cfa_offset 1136
; CHECK1024-NEXT:    cntd x9
; CHECK1024-NEXT:    stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK1024-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK1024-NEXT:    str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x30, [sp, #1096] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x9, [sp, #1104] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x20, [sp, #1120] // 8-byte Folded Spill
; CHECK1024-NEXT:    str x19, [sp, #1128] // 8-byte Folded Spill
; CHECK1024-NEXT:    mov x29, sp
; CHECK1024-NEXT:    .cfi_def_cfa w29, 1136
; CHECK1024-NEXT:    .cfi_offset w19, -8
; CHECK1024-NEXT:    .cfi_offset w20, -16
; CHECK1024-NEXT:    .cfi_offset w28, -24
; CHECK1024-NEXT:    .cfi_offset w30, -40
; CHECK1024-NEXT:    .cfi_offset w29, -48
; CHECK1024-NEXT:    .cfi_offset b8, -1080
; CHECK1024-NEXT:    .cfi_offset b9, -1088
; CHECK1024-NEXT:    .cfi_offset b10, -1096
; CHECK1024-NEXT:    .cfi_offset b11, -1104
; CHECK1024-NEXT:    .cfi_offset b12, -1112
; CHECK1024-NEXT:    .cfi_offset b13, -1120
; CHECK1024-NEXT:    .cfi_offset b14, -1128
; CHECK1024-NEXT:    .cfi_offset b15, -1136
; CHECK1024-NEXT:    sub sp, sp, #1040
; CHECK1024-NEXT:    rdsvl x8, #1
; CHECK1024-NEXT:    mov x9, sp
; CHECK1024-NEXT:    mov w20, w0
; CHECK1024-NEXT:    msub x9, x8, x8, x9
; CHECK1024-NEXT:    mov sp, x9
; CHECK1024-NEXT:    sub x10, x29, #784
; CHECK1024-NEXT:    stur x9, [x10, #-256]
; CHECK1024-NEXT:    sub x9, x29, #774
; CHECK1024-NEXT:    sub x10, x29, #772
; CHECK1024-NEXT:    sturh wzr, [x9, #-256]
; CHECK1024-NEXT:    sub x9, x29, #1040
; CHECK1024-NEXT:    stur wzr, [x10, #-256]
; CHECK1024-NEXT:    sub x10, x29, #776
; CHECK1024-NEXT:    sturh w8, [x10, #-256]
; CHECK1024-NEXT:    msr TPIDR2_EL0, x9
; CHECK1024-NEXT:    .cfi_offset vg, -32
; CHECK1024-NEXT:    smstop sm
; CHECK1024-NEXT:    bl other
; CHECK1024-NEXT:    smstart sm
; CHECK1024-NEXT:    .cfi_restore vg
; CHECK1024-NEXT:    smstart za
; CHECK1024-NEXT:    mrs x8, TPIDR2_EL0
; CHECK1024-NEXT:    sub x0, x29, #1040
; CHECK1024-NEXT:    cbnz x8, .LBB33_2
; CHECK1024-NEXT:  // %bb.1: // %entry
; CHECK1024-NEXT:    bl __arm_tpidr2_restore
; CHECK1024-NEXT:  .LBB33_2: // %entry
; CHECK1024-NEXT:    mov w0, w20
; CHECK1024-NEXT:    msr TPIDR2_EL0, xzr
; CHECK1024-NEXT:    mov sp, x29
; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1136
; CHECK1024-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x19, [sp, #1128] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldr x20, [sp, #1120] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x28, [sp, #1112] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x30, [sp, #1096] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK1024-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK1024-NEXT:    ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK1024-NEXT:    add sp, sp, #1136
; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
; CHECK1024-NEXT:    .cfi_restore w19
; CHECK1024-NEXT:    .cfi_restore w20
; CHECK1024-NEXT:    .cfi_restore w28
; CHECK1024-NEXT:    .cfi_restore w30
; CHECK1024-NEXT:    .cfi_restore w29
; CHECK1024-NEXT:    .cfi_restore b8
; CHECK1024-NEXT:    .cfi_restore b9
; CHECK1024-NEXT:    .cfi_restore b10
; CHECK1024-NEXT:    .cfi_restore b11
; CHECK1024-NEXT:    .cfi_restore b12
; CHECK1024-NEXT:    .cfi_restore b13
; CHECK1024-NEXT:    .cfi_restore b14
; CHECK1024-NEXT:    .cfi_restore b15
; CHECK1024-NEXT:    ret
entry:
  tail call void @other()
  ret i32 %x
}
declare void @other()