llvm/llvm/test/CodeGen/AArch64/settag-merge-order.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=1 | FileCheck %s

declare void @use(ptr %p)
declare void @llvm.aarch64.settag(ptr %p, i64 %a)
declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a)

; Two loops of size 256; the second loop updates SP.
; After frame reordering, two loops can be merged into one.
define void @stg128_128_gap_128_128() {
; CHECK-LABEL: stg128_128_gap_128_128:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT:    sub sp, sp, #544
; CHECK-NEXT:    .cfi_def_cfa_offset 560
; CHECK-NEXT:    .cfi_offset w30, -8
; CHECK-NEXT:    .cfi_offset w29, -16
; CHECK-NEXT:    add x0, sp, #512
; CHECK-NEXT:    bl use
; CHECK-NEXT:    mov x8, #512 // =0x200
; CHECK-NEXT:  .LBB0_1: // %entry
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st2g sp, [sp], #32
; CHECK-NEXT:    subs x8, x8, #32
; CHECK-NEXT:    b.ne .LBB0_1
; CHECK-NEXT:  // %bb.2: // %entry
; CHECK-NEXT:    add sp, sp, #32
; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT:    ret
entry:
  %a = alloca i8, i32 128, align 16
  %a2 = alloca i8, i32 128, align 16
  %b = alloca i8, i32 32, align 16
  %c = alloca i8, i32 128, align 16
  %c2 = alloca i8, i32 128, align 16
  call void @use(ptr %b)
  call void @llvm.aarch64.settag(ptr %a, i64 128)
  call void @llvm.aarch64.settag(ptr %a2, i64 128)
  call void @llvm.aarch64.settag(ptr %c, i64 128)
  call void @llvm.aarch64.settag(ptr %c2, i64 128)
  ret void
}

define void @stg2(i1 %flag) {
; CHECK-LABEL: stg2:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT:    sub sp, sp, #608
; CHECK-NEXT:    .cfi_def_cfa_offset 640
; CHECK-NEXT:    .cfi_offset w19, -8
; CHECK-NEXT:    .cfi_offset w30, -16
; CHECK-NEXT:    .cfi_offset w29, -32
; CHECK-NEXT:    mov w19, w0
; CHECK-NEXT:    add x0, sp, #576
; CHECK-NEXT:    bl use
; CHECK-NEXT:    tbz w19, #0, .LBB1_4
; CHECK-NEXT:  // %bb.1: // %if.then
; CHECK-NEXT:    add x9, sp, #256
; CHECK-NEXT:    mov x8, #320 // =0x140
; CHECK-NEXT:  .LBB1_2: // %if.then
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st2g x9, [x9], #32
; CHECK-NEXT:    subs x8, x8, #32
; CHECK-NEXT:    b.ne .LBB1_2
; CHECK-NEXT:  // %bb.3: // %if.then
; CHECK-NEXT:    b .LBB1_7
; CHECK-NEXT:  .LBB1_4: // %if.else
; CHECK-NEXT:    mov x9, sp
; CHECK-NEXT:    mov x8, #256 // =0x100
; CHECK-NEXT:  .LBB1_5: // %if.else
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st2g x9, [x9], #32
; CHECK-NEXT:    subs x8, x8, #32
; CHECK-NEXT:    b.ne .LBB1_5
; CHECK-NEXT:  // %bb.6: // %if.else
; CHECK-NEXT:  .LBB1_7: // %if.end
; CHECK-NEXT:    mov x8, #576 // =0x240
; CHECK-NEXT:  .LBB1_8: // %if.end
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    st2g sp, [sp], #32
; CHECK-NEXT:    subs x8, x8, #32
; CHECK-NEXT:    b.ne .LBB1_8
; CHECK-NEXT:  // %bb.9: // %if.end
; CHECK-NEXT:    add sp, sp, #32
; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT:    ldr x29, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT:    ret
entry:
  %a = alloca i8, i32 160, align 16
  %a2 = alloca i8, i32 160, align 16
  %b = alloca i8, i32 32, align 16
  %c = alloca i8, i32 128, align 16
  %c2 = alloca i8, i32 128, align 16
  call void @use(ptr %b)
  br i1 %flag, label %if.then, label %if.else

if.then:
  call void @llvm.aarch64.settag(ptr %a, i64 160)
  call void @llvm.aarch64.settag(ptr %a2, i64 160)
  br label %if.end

if.else:
  call void @llvm.aarch64.settag(ptr %c, i64 128)
  call void @llvm.aarch64.settag(ptr %c2, i64 128)
  br label %if.end

if.end:
  call void @llvm.aarch64.settag(ptr %a, i64 160)
  call void @llvm.aarch64.settag(ptr %a2, i64 160)
  call void @llvm.aarch64.settag(ptr %c, i64 128)
  call void @llvm.aarch64.settag(ptr %c2, i64 128)

  ret void
}