; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s
declare void @use(ptr %p)
declare void @llvm.aarch64.settag(ptr %p, i64 %a)
declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a)
define void @stg16_16() {
; CHECK-LABEL: stg16_16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 16, align 16
%b = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(ptr %a, i64 16)
call void @llvm.aarch64.settag(ptr %b, i64 16)
ret void
}
define i32 @stg16_16_16_16_ret() {
; CHECK-LABEL: stg16_16_16_16_ret:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: st2g sp, [sp, #32]
; CHECK-NEXT: st2g sp, [sp], #64
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 16, align 16
%b = alloca i8, i32 16, align 16
%c = alloca i8, i32 16, align 16
%d = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(ptr %a, i64 16)
call void @llvm.aarch64.settag(ptr %b, i64 16)
call void @llvm.aarch64.settag(ptr %c, i64 16)
call void @llvm.aarch64.settag(ptr %d, i64 16)
ret i32 0
}
define void @stg16_16_16_16() {
; CHECK-LABEL: stg16_16_16_16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: st2g sp, [sp, #32]
; CHECK-NEXT: st2g sp, [sp], #64
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 16, align 16
%b = alloca i8, i32 16, align 16
%c = alloca i8, i32 16, align 16
%d = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(ptr %a, i64 16)
call void @llvm.aarch64.settag(ptr %b, i64 16)
call void @llvm.aarch64.settag(ptr %c, i64 16)
call void @llvm.aarch64.settag(ptr %d, i64 16)
ret void
}
define void @stg128_128_128_128() {
; CHECK-LABEL: stg128_128_128_128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #512
; CHECK-NEXT: .cfi_def_cfa_offset 528
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #512 // =0x200
; CHECK-NEXT: .LBB3_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB3_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 128, align 16
%b = alloca i8, i32 128, align 16
%c = alloca i8, i32 128, align 16
%d = alloca i8, i32 128, align 16
call void @llvm.aarch64.settag(ptr %a, i64 128)
call void @llvm.aarch64.settag(ptr %b, i64 128)
call void @llvm.aarch64.settag(ptr %c, i64 128)
call void @llvm.aarch64.settag(ptr %d, i64 128)
ret void
}
define void @stg16_512_16() {
; CHECK-LABEL: stg16_512_16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #544
; CHECK-NEXT: .cfi_def_cfa_offset 560
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #544 // =0x220
; CHECK-NEXT: .LBB4_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB4_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 16, align 16
%b = alloca i8, i32 512, align 16
%c = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(ptr %a, i64 16)
call void @llvm.aarch64.settag(ptr %b, i64 512)
call void @llvm.aarch64.settag(ptr %c, i64 16)
ret void
}
define void @stg512_512_512() {
; CHECK-LABEL: stg512_512_512:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1536
; CHECK-NEXT: .cfi_def_cfa_offset 1552
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #1536 // =0x600
; CHECK-NEXT: .LBB5_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB5_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 512, align 16
%b = alloca i8, i32 512, align 16
%c = alloca i8, i32 512, align 16
call void @llvm.aarch64.settag(ptr %a, i64 512)
call void @llvm.aarch64.settag(ptr %b, i64 512)
call void @llvm.aarch64.settag(ptr %c, i64 512)
ret void
}
define void @early(i1 %flag) {
; CHECK-LABEL: early:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #144
; CHECK-NEXT: .cfi_def_cfa_offset 144
; CHECK-NEXT: tbz w0, #0, .LBB6_2
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: st2g sp, [sp, #48]
; CHECK-NEXT: st2g sp, [sp, #80]
; CHECK-NEXT: st2g sp, [sp, #112]
; CHECK-NEXT: .LBB6_2: // %if.end
; CHECK-NEXT: stg sp, [sp, #32]
; CHECK-NEXT: st2g sp, [sp], #144
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 48, align 16
%b = alloca i8, i32 48, align 16
%c = alloca i8, i32 48, align 16
br i1 %flag, label %if.then, label %if.end
if.then:
call void @llvm.aarch64.settag(ptr %a, i64 48)
call void @llvm.aarch64.settag(ptr %b, i64 48)
br label %if.end
if.end:
call void @llvm.aarch64.settag(ptr %c, i64 48)
ret void
}
define void @early_128_128(i1 %flag) {
; CHECK-LABEL: early_128_128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #320
; CHECK-NEXT: str x29, [sp, #304] // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: tbz w0, #0, .LBB7_4
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: add x9, sp, #48
; CHECK-NEXT: mov x8, #256 // =0x100
; CHECK-NEXT: .LBB7_2: // %if.then
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g x9, [x9], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB7_2
; CHECK-NEXT: // %bb.3: // %if.then
; CHECK-NEXT: .LBB7_4: // %if.end
; CHECK-NEXT: stg sp, [sp, #32]
; CHECK-NEXT: st2g sp, [sp], #304
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 128, align 16
%b = alloca i8, i32 128, align 16
%c = alloca i8, i32 48, align 16
br i1 %flag, label %if.then, label %if.end
if.then:
call void @llvm.aarch64.settag(ptr %a, i64 128)
call void @llvm.aarch64.settag(ptr %b, i64 128)
br label %if.end
if.end:
call void @llvm.aarch64.settag(ptr %c, i64 48)
ret void
}
define void @early_512_512(i1 %flag) {
; CHECK-LABEL: early_512_512:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1072
; CHECK-NEXT: .cfi_def_cfa_offset 1088
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: tbz w0, #0, .LBB8_4
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: add x9, sp, #48
; CHECK-NEXT: mov x8, #1024 // =0x400
; CHECK-NEXT: .LBB8_2: // %if.then
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g x9, [x9], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB8_2
; CHECK-NEXT: // %bb.3: // %if.then
; CHECK-NEXT: .LBB8_4: // %if.end
; CHECK-NEXT: stg sp, [sp, #32]
; CHECK-NEXT: st2g sp, [sp], #1072
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 512, align 16
%b = alloca i8, i32 512, align 16
%c = alloca i8, i32 48, align 16
br i1 %flag, label %if.then, label %if.end
if.then:
call void @llvm.aarch64.settag(ptr %a, i64 512)
call void @llvm.aarch64.settag(ptr %b, i64 512)
br label %if.end
if.end:
call void @llvm.aarch64.settag(ptr %c, i64 48)
ret void
}
; Two loops of size 256; the second loop updates SP.
define void @stg128_128_gap_128_128() {
; CHECK-LABEL: stg128_128_gap_128_128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #544
; CHECK-NEXT: .cfi_def_cfa_offset 560
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: add x0, sp, #256
; CHECK-NEXT: bl use
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov x8, #256 // =0x100
; CHECK-NEXT: .LBB9_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g x9, [x9], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB9_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: add sp, sp, #288
; CHECK-NEXT: mov x8, #256 // =0x100
; CHECK-NEXT: .LBB9_3: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB9_3
; CHECK-NEXT: // %bb.4: // %entry
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 128, align 16
%a2 = alloca i8, i32 128, align 16
%b = alloca i8, i32 32, align 16
%c = alloca i8, i32 128, align 16
%c2 = alloca i8, i32 128, align 16
call void @use(ptr %b)
call void @llvm.aarch64.settag(ptr %a, i64 128)
call void @llvm.aarch64.settag(ptr %a2, i64 128)
call void @llvm.aarch64.settag(ptr %c, i64 128)
call void @llvm.aarch64.settag(ptr %c2, i64 128)
ret void
}
; Function Attrs: nounwind
declare i32 @printf(ptr, ...) #0
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; Case 1
; Insert point of stg merge is followed by nzcv read
; Don't merge in this case
define i32 @nzcv_clobber(i32 %in) {
entry:
; CHECK-LABEL: nzcv_clobber:
; CHECK: stg sp, [sp, #528]
; CHECK-NEXT: .LBB10_1:
; CHECK: st2g x9, [x9], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB10_1
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: cmp w0, #10
; CHECK-NEXT: stg sp, [sp]
; CHECK-NEXT: b.ge .LBB10_4
%a = alloca i8, i32 16, align 16
%b = alloca i8, i32 512, align 16
%c = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(ptr %a, i64 16)
call void @llvm.aarch64.settag(ptr %b, i64 512)
%cmp = icmp slt i32 %in, 10
call void @llvm.aarch64.settag(ptr %c, i64 16)
br i1 %cmp, label %return0, label %return1
return0: ; preds = %entry
%call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1
ret i32 0
return1:
ret i32 1
}
; Case 2
; Insert point of stg merge is not followed by nzcv read
; Merge in this case
define i32 @nzcv_no_clobber(i32 %in) {
entry:
; CHECK-LABEL: nzcv_no_clobber:
; CHECK: mov x8, #544
; CHECK-NEXT: .LBB11_1:
; CHECK: st2g sp, [sp], #32
; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: b.ne .LBB11_1
%a = alloca i8, i32 16, align 16
%b = alloca i8, i32 512, align 16
%c = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(ptr %a, i64 16)
call void @llvm.aarch64.settag(ptr %b, i64 512)
call void @llvm.aarch64.settag(ptr %c, i64 16)
br label %return1
return0: ; preds = %entry
%call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1
ret i32 0
return1:
ret i32 1
}